In my initial analysis of the freshwater-saltwater dataset, I had three vcf files: 1. one generated from all pairwise comparisons of populations, containing SNPs only found in all 16 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“separate”) 2. one generated from all pairwise comparisons of populations, containing SNPs found in 4 populations, in 75% of individuals, and with a minor allele frequency of at least 5%. (“P4”) 3. one generated from comparing lumped ‘freshwater’ and ‘saltwater’ populations, containing SNPs found in 50% of individuals and with a minor allele frequency of at least 5%. (“lumped”)

I did the majority of the analyses using set #3, but would like to explore what changes if I use dataset #2. I think #1 is too restrictive. Dataset #2 is in the “subset” dataset, and is what I ran the structure analyses on. This is the dataset I’m going to move forward with for the paper.

Note that in most of these cases the actual analysis will be set to eval=FALSE once I’ve run it once, because then I save the output and only have to read it in, saving compilation time.

source("../../gwscaR/R/gwscaR.R")
source("../../gwscaR/R/gwscaR_plot.R")
source("../../gwscaR/R/gwscaR_utility.R")
source("../../gwscaR/R/gwscaR_fsts.R")
source("../../gwscaR/R/gwscaR_popgen.R")
source("../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
library(knitr)
pop.list<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
    "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLLG")
pop.labs<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST","ALFW","FLSG","FLKB",
            "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC","FLFW")
fw.list<-c("TXFW","LAFW","ALFW","FLLG")
sw.list<-c("TXSP","TXCC","TXCB","ALST","FLSG","FLKB",
    "FLFD","FLSI","FLAB","FLPB","FLHB","FLCC")
lgs<-c("LG1","LG2","LG3","LG4","LG5","LG6","LG7","LG8","LG9","LG10","LG11",
    "LG12","LG13","LG14","LG15","LG16","LG17","LG18","LG19","LG20","LG21",
    "LG22")
lgn<-seq(1,22)
all.colors<-c(rep("black",2),"#2166ac","black","#2166ac","black","#2166ac",
        rep("black",8),"#2166ac")
#grp.colors<-c('#e41a1c','#377eb8','#4daf4a','#984ea3','#ffff33','#f781bf')
grp.colors<-c('#762a83','#af8dc3','#e7d4e8','#d9f0d3','#7fbf7b','#1b7837')
pwise.fst.all<-read.table("stacks/fwsw_fst_summary.txt",header=T,row.names=1,sep='\t')
    #pwise.fst.all<-rbind(pwise.fst.all,rep(NA,ncol(pwise.fst.all)))
    rownames(pwise.fst.all)<-pop.labs
    colnames(pwise.fst.all)<-pop.labs
pwise.fst.sub<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')
  colnames(pwise.fst.sub)<-pop.labs
  rownames(pwise.fst.sub)<-pop.labs
print(paste("Average Pairwise Fst is ",mean(pwise.fst.sub[upper.tri(pwise.fst.sub)]),sep=""))
ped.sub<-read.table("stacks/subset.ped",header=F)   
ped.sub$V1<-gsub("sample_(\\w{4})\\w+.*","\\1",ped.sub$V2)
map.sub<-read.table("stacks/subset.map",header = F,stringsAsFactors = F)
map.sub$Locus<-paste(gsub("(\\d+)_\\d+","\\1",map.sub$V2),(as.numeric(map.sub$V4)+1),sep=".")
colnames(ped.sub)<-c("Pop","IID","","","","Phenotype","","",map.sub$Locus)

This P4/subset dataset has 9820 SNPs from 9820 RAD loci, from 698 indivudals in 16 populations.

Generate a new vcf file

The first thing to do is to create a vcf file using the subset parameters. I’ve already got a whitelist of loci in the subsetted dataset, so I need to run populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --vcf, which I did on 2017-12-18 on silivren-lond. I then re-named it to p4.vcf (and the other output files).

vcf<-parse.vcf("p4.upd.vcf") #this is the smaller dataset
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")

The vcf file contains 9638 SNPs from 9638 RAD loci.

Choose a subset of the SNPs to re-use. [Do I need to do this?]

chosen.snps<-choose.one.snp(vcf)$SNP
write.table(chosen.snps,"chosen.all.snps.txt",quote=F)
chosen.snps<-unlist(read.table("chosen.all.snps.txt"))

There are 2546 SNPs that I’ll use from the vcf file, from 9638 RAD loci.

Figure 1

The first figure in the paper is a map of the collection sites.

jpeg("all_sites_map.jpg", res=300, height=7,width=14, units="in")
pdf("all_sites_map.pdf",height=7,width=14)
par(oma=c(0,0,0,0),mar=c(0,0,0,0),pin=c(7,7))
map("worldHires", "usa",xlim=c(-100,-76), ylim=c(24,32), 
    col="gray90", mar=c(0,0,0,0),fill=TRUE, res=300,myborder=0)
map("worldHires", "mexico",xlim=c(-100,-76), ylim=c(24,32), 
    col="gray95", fill=TRUE, add=TRUE)
points(mar.coor$lon, mar.coor$lat,  col="black", cex=2, pch=19)
points(-1*fw.coor$lon, fw.coor$lat,  col="cornflowerblue", cex=2, pch=18)
abline(h=c(25,30,35),lty=3)
abline(v=c(-80,-85,-90,-95,-100),lty=3)
text(x=c(-99.5,-99.5),y=c(25,30),c("25N","30N"),cex=1.75)
text(x=c(-80,-85,-90,-95),y=rep(31.8,4),c("80W","85W","90W","95W"),cex=1.75)
text(y=26,x=-90,"Gulf of Mexico",cex=1.75)
text(y=25.5,x=-98.5,"Mexico",cex=1.75)
text(x=-91,y=31,"USA",cex=1.75)
text(x=-78,y=29.5,"Atlantic Ocean",cex=1.75)
text(x=-96.5,y=26,"TXSP",font=2,cex=1.75)
text(x=-96.7,y=27.2,"TXCC",font=2,cex=1.75)
text(x=-96,y=28.3,"TXFW",font=2,col="cornflowerblue",cex=1.75)
text(x=-94.7,y=29,"TXCB",font=2,cex=1.75)
text(x=-90.2,y=30.3,"LAFW",font=2,col="cornflowerblue",cex=1.75)
text(x=-88,y=30,"ALST",font=2,cex=1.75)
text(x=-87,y=30.75,"ALFW",font=2,col="cornflowerblue",cex=1.75)
text(x=-85,y=29.4,"FLSG",font=2,cex=1.75)
text(x=-83.5,y=29.2,"FLKB",font=2,cex=1.75)
text(x=-83.2,y=27.6,"FLFD",font=2,cex=1.75)
text(x=-82.2,y=26,"FLSI",font=2,cex=1.75)
text(x=-80,y=24.8,"FLAB",font=2,cex=1.75)
text(x=-79.3,y=26.8,"FLPB",font=2,cex=1.75)
text(x=-79.5,y=27.2,"FLHB",font=2,cex=1.75)
text(x=-80.2,y=28,"FLCC",font=2,cex=1.75)
text(x=-80.9,y=29.5,"FLFW",font=2,col="cornflowerblue",cex=1.75)
dev.off()
Figure 1. Map of collection sites

Figure 1. Map of collection sites

Figure 2

The second figure in the paper is showing population structure, using STRUCTURE, adegenet, and PCAdapt. These analyses were run as exactly written in 002_fwsw_analysis.R, so I won’t reproduce that code here.

Now for a combined figure:

dapc1<-readRDS("dapc1.RDS")
ind.names<-rownames(dapc1$ind.coord)
pop<-substr(ind.names, 8,11)
colors<-pop
colors[colors %in% sw.list]<-"black"
colors[colors %in% fw.list]<-"#2166ac"
pop.pch<-pop
pop.pch[pop.pch=="TXSP"]<-"0"
pop.pch[pop.pch=="TXCC"]<-"1"
pop.pch[pop.pch=="TXFW"]<-"21"
pop.pch[pop.pch=="TXCB"]<-"2"
pop.pch[pop.pch=="LAFW"]<-"24"
pop.pch[pop.pch=="ALST"]<-"3"
pop.pch[pop.pch=="ALFW"]<-"23"
pop.pch[pop.pch=="FLSG"]<-"4"
pop.pch[pop.pch=="FLKB"]<-"5"
pop.pch[pop.pch=="FLFD"]<-"6"
pop.pch[pop.pch=="FLSI"]<-"7"
pop.pch[pop.pch=="FLAB"]<-"9"
pop.pch[pop.pch=="FLPB"]<-"10"
pop.pch[pop.pch=="FLHB"]<-"11"
pop.pch[pop.pch=="FLCC"]<-"12"
pop.pch[pop.pch=="FLLG"]<-"22"
#pop plot info
ppi<-data.frame(Pop=pop.labs,cols = all.colors,pch=c(0,1,21,2,24,3,23,4,5,6,7,9,10,11,12,22))

da<-data.frame(Individual=rownames(dapc1$ind.coord),Pop=substr(rownames(dapc1$ind.coord),8,11),
               LD1=dapc1$ind.coord[,1],LD2=dapc1$ind.coord[,2],
               LD3=dapc1$ind.coord[,3],LD4=dapc1$ind.coord[,4],LD5=dapc1$ind.coord[,5],
               Group=dapc1$grp, GrpName=names(dapc1$grp),
               stringsAsFactors = F) #include grpname to check
da$Pop[da$Pop == "FLLG"]<-"FLFW"
da$colors<-da$Pop
for(i in 1:nrow(da)){
  da[i,"colors"]<-as.character(ppi[ppi$Pop %in% da[i,"Pop"],"cols"])
}
da$pch<-da$Pop
for(i in 1:nrow(da)){
  da[i,"pch"]<-as.numeric(ppi[ppi$Pop %in% da[i,"Pop"],"pch"])
}
npop<-length(pop.list)
pseq<-1:npop
m<-matrix(c(1:32,rep(33,7),rep(34,7),rep(0,2),
            rep(35,7),rep(36,7),rep(0,2)),
          nrow=4,ncol=npop,byrow = T)
jpeg("combinedStructure.jpeg",res=300,height=8,width=8,units="in")
layout(mat=m,heights=c(1,1,6,6))
#STRUCTURE
par(oma=c(1.5,3.5,1,1),mar=c(1,0,0,0))
plotting.structure(structure.k2,2,pop.list, make.file=FALSE, xlabcol = all.colors,plot.new=F,
                   colors=grp.colors[c(1,6)],xlabel=F,
                   ylabel=expression(atop(italic(K)==2,358.9)))
plotting.structure(structure.k6,2,pop.labs, make.file=FALSE,
                   plot.new=F,colors=grp.colors,xlabel=T,
                   xlabcol = all.colors,
                   ylabel=expression(atop(italic(K)==6,326.1)))
#PCADAPT
par(mar=c(2,2,2,2))
plot(pa$scores[,1],pa$scores[,2],col=alpha(pap$cols,0.5),bg=alpha(pap$cols,0.75),
     pch=as.numeric(pap$pch),   cex=2,bty="L",xlab="",ylab="",cex.axis=1.5)
#points(pa$scores[grp=="freshwater",1],pa$scores[grp=="freshwater",2],
#       col=alpha(pap$cols[pap$grp=="freshwater"],0.5),
#       bg=alpha(pap$cols[pap$grp=="freshwater"],0.75),pch=as.numeric(pap$pch[pap$grp=="freshwater"]),
#       cex=2) #to highlight the fw pops
mtext(paste("PC1 (",pa.props[1],"%)",sep=""),1,line = 2.5,cex=1)
mtext(paste("PC2 (",pa.props[2],"%)",sep=""),2,line = 2.5,cex=1)

plot(pa$scores[,3],pa$scores[,4],col=alpha(pap$cols,0.5),bg=alpha(pap$cols,0.75),pch=as.numeric(pap$pch),
     cex=2, bty="L",xlab="",ylab="",cex.axis=1.5)
#points(pa$scores[grp=="freshwater",3],pa$scores[grp=="freshwater",4],
#     col=alpha(pap$cols[pap$grp=="freshwater"],0.5),
#     bg=alpha(pap$cols[pap$grp=="freshwater"],0.75),pch=as.numeric(pap$pch[pap$grp=="freshwater"]),
#     cex=2)
mtext(paste("PC3 (",pa.props[3],"%)",sep=""),1,line = 2.5,cex=1)
mtext(paste("PC4 (",pa.props[4],"%)",sep=""),2,line = 2.5,cex=1)

# plot(pa$scores[,5],pa$scores[,6],col=alpha(pap$cols,0.5),bg=alpha(pap$cols,0.75),pch=as.numeric(pap$pch),
#      cex=1.5,bty="L",xlab="",ylab="")
# points(pa$scores[grp=="freshwater",5],pa$scores[grp=="freshwater",6],
#        col=alpha(pap$cols[pap$grp=="freshwater"],0.5),
#        bg=alpha(pap$cols[pap$grp=="freshwater"],0.75),pch=as.numeric(pap$pch[pap$grp=="freshwater"]),
#        cex=1.5)
# mtext(paste("PC5 (",pa.props[5],"%)",sep=""),1,line = 2,cex=0.75)
# mtext(paste("PC6 (",pa.props[6],"%)",sep=""),2,line = 2,cex=0.75)

# ADEGENET
par(mar=c(2,2,2,2))
plot(da$LD1,da$LD2,col=alpha(da$colors,0.5),pch=as.numeric(da$pch),cex=2,lwd=1.3,
     bg=alpha(da$colors,0.75),xlab="",ylab="",xlim=c(-10,25),ylim=c(-10,15),bty="n",cex.axis=1.5)
par(lwd=3,bty='n')
s.class(dapc1$ind.coord,fac=factor(da$Group), clabel=0,cstar=0,cellipse=2.5,
        addaxes = F,pch="",grid=F,axesel=F,add.plot = T,col=grp.colors[c(1,5,6,3,2,4)],xlim=c(-10,25),ylim=c(-10,15))
mtext(paste("DAPC 1 (", round(dapc1$eig[1]/sum(dapc1$eig)*100, 2), "%)", sep=""),
      1, line = 2.5,cex=1)
mtext(paste("DAPC 2 (", round(dapc1$eig[2]/sum(dapc1$eig)*100, 2), "%)", sep=""),
      2, line = 2.1,cex=1)


plot(da$LD3,da$LD4,col=alpha(da$colors,0.5),pch=as.numeric(da$pch),cex=2,lwd=1.3,
     bg=alpha(da$colors,0.75),xlab="",ylab="",xlim=c(-10,10),ylim=c(-10,5),cex.axis=1.5,bty="n")
par(lwd=3,bty='n')
s.class(dapc1$ind.coord[,3:4],fac=factor(da$Group), clabel=0,cstar=0,cellipse=2.5,
        addaxes = F,pch="",grid=F,axesel=F,add.plot = T,col=grp.colors[c(1,5,6,3,2,4)],xlim=c(-10,10),ylim=c(-10,5))
mtext(paste("DAPC 3 (", round(dapc1$eig[3]/sum(dapc1$eig)*100, 2), "%)", sep=""),
      1, line = 2.5,cex=1)
mtext(paste("DAPC 4 (", round(dapc1$eig[4]/sum(dapc1$eig)*100, 2), "%)", sep=""),
      2, line = 2.1,cex=1)

# plot(da$LD4,da$LD5,col=alpha(da$colors,0.5),pch=as.numeric(da$pch),cex=2,lwd=1.3,
#      bg=alpha(colors,0.25),xlab="",ylab="",xlim=c(-5,15),ylim=c(-10,5))
# par(lwd=3,bty='n')
# s.class(dapc1$ind.coord[,4:5],fac=factor(da$Group), clabel=0,cstar=0,cellipse=2.5,
#         addaxes = F,pch="",grid=F,axesel=F,add.plot = T,col=grp.colors[c(3,5,6,4,2,1)],xlim=c(-5,15),ylim=c(-10,5))
# mtext(paste("DAPC 4 (", round(dapc1$eig[4]/sum(dapc1$eig)*100, 2), "%)", sep=""),
#       1, line = 2,cex=0.75)
# mtext(paste("DAPC 5 (", round(dapc1$eig[5]/sum(dapc1$eig)*100, 2), "%)", sep=""),
#       2, line = 2,cex=0.75)

par(fig = c(0, 1, 0, 1), oma=c(2,1,0,1), mar = c(0, 0, 0, 0), new = TRUE,
    cex=1,lwd=1.3)
plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
legend(x=0.75,y=0.5, legend=ppi$Pop, pch=as.numeric(ppi$pch), pt.cex=1.5,cex=1,
       col=alpha(ppi$cols, 0.5),pt.bg=alpha(ppi$cols,0.75), ncol=1,bty='n')
#add delta K
text(x=-1.02,y=0.7,srt=90,labels = expression(Delta~italic(K)==""))
dev.off()
Figure 2. Population Structure

Figure 2. Population Structure

Figure 3

I don’t need to re-calculate pairwise Jost’s D, and Fsts using the P4 (or “subset”) dataset, so I can just read in those files. But I do need to run Treemix and PopTree2.

Treemix

To run treemix, I follow the following steps:

  1. Fit tree without migration
  2. Add migration edges using -m.
  3. Use f3 and f4 ancestry estimation to approximate the amount of admixture and compare to treemix.
  4. Use f4 statistics to understand poor fits.

All of these require setting a root, which is FLPB based on previous trees.

First, I need to create a file in the correct format, which uses the vcf file:

treemix.name<-"treemix/p4_treemix"
treemix.prefix<-"treemix/p4_"
poporder.file<-"treemix/poporder"
fst.tree.name<-as.character("ALLfst_cov_heatmap.png")
tm.fwsw<-treemix.from.vcf(vcf,pop.list)
write.table(tm.fwsw,treemix.name,col.names=TRUE,row.names=FALSE,quote=F,sep=' ')
#then in unix: gzip -c treemix.name > treemix.name.gz

Then, in unix, I need to run gzip -c treemix/p4_treemix > treemix/p4_treemix.gz. Now I can run scripts/run_treemix.sh, which implements steps 1 and 2, and which I need to run in Ubuntu. Note that there are a lot of “no counts” warnings from treemix. Also, that it runs very quickly

After that, I can evaluate the different outcomes.

setwd("treemix")
source("../../scripts/002_treemix_plotting_funcs.R")#I've modified these functions
poporder<-c("TXSP","TXCC","TXFW","TXCB","LAFW","ALST",
            "ALFW","FLSG","FLKB","FLFD","FLSI","FLAB",
            "FLPB","FLHB","FLCC","FLLG")
colors<-poporder
colors[colors %in% "FLLG"]<-grp.colors[6]
colors[colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
colors[colors %in% c("FLAB")]<-grp.colors[5]
colors[colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
colors[colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
colors[colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]
write.table(cbind(poporder,colors),"poporder",quote=F,sep='\t')
setwd("../")
m0<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder,split=c(1,1,3,2),more=TRUE)
m1<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder,split=c(2,1,3,2),more=TRUE)
m2<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder,split=c(3,1,3,2),more=TRUE)
m3<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder,split=c(1,2,3,2),more=TRUE)
m4<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder,split=c(2,2,3,2),more=TRUE)
m5<-treemix.cov.plot(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder,split=c(3,2,3,2),more=FALSE)

par(mfrow=c(2,3))
r0<-plot_resid(paste(treemix.prefix,"k100bFLPBr",sep=""),poporder.file)
r1<-plot_resid(paste(treemix.prefix,"k100bFLPBrm1",sep=""),poporder.file)
r2<-plot_resid(paste(treemix.prefix,"k100bFLPBrm2",sep=""),poporder.file)
r3<-plot_resid(paste(treemix.prefix,"k100bFLPBrm3",sep=""),poporder.file)
r4<-plot_resid(paste(treemix.prefix,"k100bFLPBrm4",sep=""),poporder.file)
r5<-plot_resid(paste(treemix.prefix,"k100bFLPBrm5",sep=""),poporder.file)

Population pairs that are ‘too far apart’ on the tree (have high error estimates) are ones that are likely candidates for gene flow - and these are the squares with dark greens, blues, and black. These are LAFW-TXFW and ALFW-TXFW in almost all of the SE graphs

par(mfrow=c(2,3),mar=c(1,1,1,1),oma=c(1,1,1,1))
t0<-plot_tree(paste(treemix.prefix,"k100bFLPBr",sep=""),plotmig = F,plus=0.05,scale=F,mbar=F)
t1<-plot_tree(paste(treemix.prefix,"k100bFLPBrm1",sep=""),plus=0.05,scale=F,mbar=F)
t2<-plot_tree(paste(treemix.prefix,"k100bFLPBrm2",sep=""),plus=0.05,scale=F,mbar=F)
t3<-plot_tree(paste(treemix.prefix,"k100bFLPBrm3",sep=""),plus=0.05,scale=F,mbar=F)
t4<-plot_tree(paste(treemix.prefix,"k100bFLPBrm4",sep=""),plus=0.05,scale=F,mbar=F)
t5<-plot_tree(paste(treemix.prefix,"k100bFLPBrm5",sep=""),plus=0.05,scale=F,mbar=F)

Look at the p-values: do migration events always improve the fit of the data?

tree0<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBr.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "")
tree1<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm1.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree2<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm2.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree3<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm3.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree4<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm4.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)
tree5<-read.table(gzfile(paste(treemix.prefix,"k100bFLPBrm5.treeout.gz",sep="")), as.is  = T, comment.char = "", quote = "",skip=1)

tree1[,4]
## [1] 9.18188e-12
tree2[,4]
## [1] "6.61839e-05"   "<2.22507e-308"
tree3[,4]
## [1] "1.02604e-05"   "<2.22507e-308" "1.88599e-10"
tree4[,4]
## [1] "<2.22507e-308" "<2.22507e-308" "<2.22507e-308" "2.89325e-05"
tree5[,4]
## [1] "6.55032e-15"   "4.94959e-09"   "<2.22507e-308" "0.00690872"   
## [5] "<2.22507e-308"

All of the added migration edges improve the fit. Evaluating both the residual and migration edge plots, we can see that four migration edges reduces the error between LAFW & TXFW and ALFW & TXFW. The largest SE is between FLFW (FLLG) and itself, and secondarily between LAFW and itself. This means that those branches are surprisingly long, which I am comfortable accepting. The strongest migration edge, between the branch from the Atlantic Florida to Gulf Florida populations -> FLAB, is unsurprising because that is an intermediate location between the Atlantic and Gulf coasts. In some of the structure analyses it clusters with the Gulf coast and in others it is its own admixed group - supporting the tree structure here.

The other migration events are somewhat more surprising. We will evaluate those using the three and four population analyses.

Evaluating migration edges

First, let’s read in the three and four population analysis results. These tested all possible three- and four-population tree groups to see whether they pass a test of ‘treeness’. If not, their p-value will be small, and they don’t form a nice tree.

f3.name<-"treemix/p4_threepop.txt"
f4.name<-"treemix/p4_fourpop.txt"

#extract the relevant lines
extract.fs<-function(filename,pat="^[A-Z]{4};"){
  f<-readLines(filename)
  fmatch<-f[grep(pat,f)]
  f<-read.table(text=fmatch)
  colnames(f)<-c("pops","f","SE","Z")
  return(f)
}
f3<-extract.fs(f3.name)
f4<-extract.fs(f4.name,pat="^[A-Z]{4},")

#add p-values
f3$p<-pnorm(f3$Z)
f4$p<-pnorm(f4$Z)

Now we can investigate the migration edges added to the trees.

Atlantic/Gulf->FLAB

This least surprising migration edge would be expected to show a signature of admixture. I’ll check the treeness of FLHB;FLSI,FLAB and FLHB,FLCC;FLSI,FLAB. For comparison, I can look at FLHB;FLSI,FLFD and FLHB,FLCC;FLSI,FLFD

checks3<-c("FLHB;FLSI,FLAB","FLHB;FLFD,FLSI")
f3[f3$pops %in% checks3,]
##                pops         f          SE       Z p
## 1583 FLHB;FLFD,FLSI 0.0250745 0.000545858 45.9359 1
## 1625 FLHB;FLSI,FLAB 0.0224990 0.000508579 44.2390 1
checks4<-c("FLSI,FLAB;FLHB,FLCC","FLFD,FLSI;FLHB,FLCC")
f4[f4$pops %in% checks4,]
##                     pops            f          SE        Z            p
## 5379 FLFD,FLSI;FLHB,FLCC -0.000159989 6.74083e-05 -2.37343 8.811867e-03
## 5425 FLSI,FLAB;FLHB,FLCC -0.000470815 8.68360e-05 -5.42188 2.948773e-08

They pass the three-population tests but not the four-population tests.

TXFW->TXCB

To investigate this migration edge, I need to evaluate whether a [TXFW[TXCB,TXCC]] or [TXFW[TXCB,TXSP]] topology is an appropriate tree.

checks<-c("TXFW;TXCC,TXCB",
          "TXFW;TXSP,TXCB")
f3[f3$pops %in% checks,]
##               pops         f         SE       Z p
## 44  TXFW;TXSP,TXCB 0.0294602 0.00227983 12.9221 1
## 318 TXFW;TXCC,TXCB 0.0304362 0.00230450 13.2073 1

Neither of these fail the treeness test, so TXFW is essentially functioning as an effective outgroup/more ancestral population. Let’s check if a four population tree also makes sense.

f4[f4$pops %in% "TXSP,TXCC;TXFW,TXCB",]
##                  pops           f          SE       Z p
## 2 TXSP,TXCC;TXFW,TXCB 0.000976003 0.000160092 6.09653 1

This also passes the treeness test - so this isn’t an actual admixture event, more of demonstrating shared ancestry.

TXFW->LAFW/ALFW

This migration edge would suggest a three-population structure of [TXFW[ALFW,LAFW]] or a four-population structure of [TXFW,TXCB[LAFW,ALFW]] or [TXFW,ALST[LAFW,ALFW]]

checks<-c("TXFW,TXCB;LAFW,ALFW",
          "TXFW,ALST;LAFW,ALFW")#to get the correct order of populations I had to
                                #use grep and manually try a few different ones
f4[f4$pops %in% checks,]
##                     pops           f          SE        Z            p
## 2463 TXFW,TXCB;LAFW,ALFW -0.00140534 0.000542958 -2.58830 4.822547e-03
## 2657 TXFW,ALST;LAFW,ALFW -0.00416643 0.000526403 -7.91491 1.237160e-15
f3[f3$pops == "TXFW;LAFW,ALFW",]
##               pops         f         SE       Z p
## 630 TXFW;LAFW,ALFW 0.0349272 0.00105283 33.1745 1

The three-population test does not fail the treeness test but the four-population tests do fail the treeness tests. This suggests that TXFW functions as an outgroup to the LAFW and ALFW - they are perhaps derived from the TXFW population rather than experiencing current admixture.

FLFW->TX/AL

FLLG is actually FLFW, and there’s a migration edge from it to the Alabama/Louisiana clade and the Texas clade. So, we can test FLLG;ALST,TXFW, FLLG;ALFW,TXCB, FLLG;ALFW,TXFW or FLLG;ALST,TXCC - see if the migration is specifically to freshwater populations or not.

checks<-c("FLLG;TXFW,ALST","FLLG;TXCB,ALFW","FLLG;TXCB,LAFW",#mix of freshwater
          "FLLG;TXFW,ALFW","FLLG;TXFW,LAFW",                 #all freshwater
          "FLLG;TXCC,ALST")                                  #no freshwater
f3[f3$pops %in% checks,]
##               pops         f         SE       Z p
## 452 FLLG;TXCC,ALST 0.0843051 0.00214158 39.3658 1
## 655 FLLG;TXFW,LAFW 0.0897111 0.00256943 34.9147 1
## 686 FLLG;TXFW,ALST 0.0833365 0.00215944 38.5917 1
## 713 FLLG;TXFW,ALFW 0.0928283 0.00279453 33.2179 1
## 853 FLLG;TXCB,LAFW 0.0857054 0.00220127 38.9345 1
## 911 FLLG;TXCB,ALFW 0.0874172 0.00219662 39.7963 1

These all pass the treeness test.

Plot the chosen tree

treemix.file<-as.character("treemix/p4_k100bFLPBr.cov.gz")
tm.vertices<-"treemix/p4_k100bFLPBrm4.vertices.gz"
tm.plot<-"treemix/p4_treemix_m4_FLPB.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"

PopTree2

For the PopTree2 analysis, I need to convert the vcf file to genepop format. I did this using PGDSpider2. Then I ran PopTree2 on Windows10, using Da to calculate neighbor-joining trees and using 1000 bootstrap replicates.

Or, if that doesn’t work,

remove.missing.data<-function(vcf, pop.list){
  exclude<-NULL
  for(i in 1:nrow(vcf))
  {
    vcf.row<-vcf[i,colnames(vcf) != "SNP"]#remove this if it exists
    missingness<-unlist(lapply(pop.list,function(pop){
      pop.vcf<-vcf.row[,grep(pop,colnames(vcf.row))]
      missing<-length(grep("\\.\\/\\.",pop.vcf))
      prop.missing<-missing/length(pop.vcf)
      return(prop.missing)
    }))
    if(length(missingness[missingness==1])>0){
      print(paste("Row ", i, " is has no data for pop ", pop.list[which(missingness==1)]))
      exclude<-c(exclude,i)
    } 
  }
  if(!is.null(exclude)){
    return(vcf[-exclude,])
  }else{
    return(vcf)
  }
}
gpop.name<-"poptree/p4.genepop"
sub.prefix<-"poptree/p4_"
vcf<-remove.missing.data(vcf, pop.list)
for(i in 1:10){
  rowsub<-sample(nrow(vcf),1000,replace = FALSE)
  gpopsub<-vcf2gpop(vcf[rowsub,colnames(vcf)!="SNP"],pop.list,paste(sub.prefix,i,".genepop",sep=""))
}
gpop<-vcf2gpop(vcf[,colnames(vcf)!="SNP"],pop.list,gpop.name)
#then run poptree on all of them

And then run poptree. Poptree ran on the full dataset as well as the subsets of 1000 SNPs. Did they all provide similar results? What does the consensus tree look like?

poptree.dir<-"poptree/"
poptree.prefix<-"p4"
poptree.png<-"p4.poptree.png"
poptree.files<-list.files(path = poptree.dir,pattern=paste(poptree.prefix,".*.nwk",sep=""))
poptree.files<-lapply(poptree.files,function(x){ paste("poptree",x,sep="/")})
poptrees<-lapply(poptree.files,read.tree)
con.poptree<-consensus(poptrees)
con.poptree$tip.label[con.poptree$tip.label=="FLLG"]<-"FLFW"

clcolr <- rep("black", dim(con.poptree$edge)[1])
#clcolr[c(12,13,14,24)]<-all.colors[3]
#png(paste(poptree.dir,poptree.prefix,".consensus.png",sep=""),height=7,width=7,units="in",res=300)
#dev.off()
png(paste(poptree.dir,poptree.prefix,".png",sep=""),height=10,width=10,units="in",res=300)
par(mfrow=c(3,4),oma=c(1,1,1,1),mar=c(1,1,1,1))
for(i in 1:length(poptrees)){
  plot.phylo(poptrees[[i]],cex=1.5)
  mtext(poptree.files[i],3)
}
plot.phylo(con.poptree,tip.color = c(rep(grp.colors[6],4),grp.colors[5],
                                     rep(grp.colors[1],4),rep(grp.colors[2],3),
                                     rep(grp.colors[3],4)),
           edge.color = clcolr,edge.width = 2,cex=1,font=1)
mtext("Consensus")
dev.off()
## png 
##   2
All Trees

All Trees

Based on this, I’m going to move forward just with the full dataset (which includes bootstrap values).

#just the full subset tree
pt.subtree<-poptrees[[1]]
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
pt.colors<-pt.subtree$tip.label
pt.colors[pt.colors %in% "FLFW"]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLPB","FLHB","FLCC")]<-grp.colors[6]
pt.colors[pt.colors %in% c("FLAB")]<-grp.colors[5]
pt.colors[pt.colors %in% c("FLSI","FLFD","FLKB","FLSG")]<-grp.colors[3]
pt.colors[pt.colors %in% c("ALST","ALFW","LAFW")]<-grp.colors[2]
pt.colors[pt.colors %in% c("TXSP","TXCC","TXFW","TXCB")]<-grp.colors[1]

clcolr <- rep("black", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
png(poptree.png,height=7,width=7,units="in",res=300)
plot.phylo(pt.subtree,tip.color = pt.colors,
           edge.color = clcolr,edge.width = 2,label.offset = 0.0015)
dev.off()
## png 
##   2
PopTree

PopTree

Make Fig 3.

To get the Poptree distance matrix, I copied and pasted the distance matrix in the p4.out file into a new file and saved it as p4.distance.out. I had to first save each part of the matrix as text files, open them in Excel to standardize the spacings, merge them, and then save them.

pt.dist<-as.matrix(read.table("poptree/p4.distance.out",header=T,row.names=1,sep='\t'))
jostpw<-as.matrix(read.table("Subset.JostsD.tsv",header=T,sep='\t'))
pwise.fst.raw<-read.table("stacks/fwsw_fst_summary_subset.txt",header=T,row.names=1,sep='\t')#p4_fst_summary.txt
pwise.fst<-matrix(nrow=length(pop.list),ncol=length(pop.list))
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.list
for(pop1 in 1:length(pop.list)){
  for(pop2 in 1:length(pop.list)){
    if(pop1 != pop2){
      raws<-c(pwise.fst.raw[pop.list[pop1],pop.list[pop2]],pwise.fst.raw[pop.list[pop2],pop.list[pop1]])
      pwise.fst[pop.list[pop1],pop.list[pop2]]<-raws[!is.na(raws)]
    }
  }
}
dimnames(pwise.fst)[[1]]<-dimnames(pwise.fst)[[2]]<-pop.labs
pwise.fst[lower.tri(pwise.fst)]<-NA
heatmaps.name<-"p4.heatmap.png"
colors<-c("blue","yellow","red")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
cov<-read.table(gzfile(treemix.file), as.is = T, head = T, quote = "", comment.char = "")
#reorder
covplot <- data.frame(matrix(nrow = nrow(cov), ncol = ncol(cov)))
for(i in 1:length(pop.list)){
  for( j in 1:length(pop.list)){
    
    covplot[i, j] = cov[which(names(cov)==pop.list[i]), which(names(cov)==pop.list[j])]
    rownames(covplot)[i]<-pop.list[i]
    colnames(covplot)[j]<-pop.list[j]
  }
}
cp<-as.matrix(covplot)
cp[lower.tri(cp)]<-NA
cp[upper.tri(cp)]<-covplot[upper.tri(covplot)]
colnames(cp)<-pop.labs
rownames(cp)<-pop.labs
cp.lv<-levelplot(cp,col.regions=cols,alpha.regions=0.7,
          scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
dimnames(pt.dist)[[1]]<-dimnames(pt.dist)[[2]]<-pop.labs
dimnames(jostpw)[[1]]<-dimnames(jostpw)[[2]]<-pop.labs
colors<-c("black","darkgrey","grey","lightgrey","cornflowerblue")
pal<-colorRampPalette(colors)
ncol=80
cols<-pal(ncol)
rev.colors<-c("cornflowerblue","lightgrey","grey","darkgrey","black")
rev.pal<-colorRampPalette(rev.colors)
rev.cols<-rev.pal(ncol)

hm.height<-list(x=3.8,units="in")#2.2
hm.width<-list(x=3.9,units="in")#2.4 in RStudio
png(heatmaps.name,height=11,width=11,units="in",res=300)
fst.lv<-levelplot(as.matrix(pwise.fst),col.regions=cols,alpha.regions=0.7,
                  scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(fst.lv,split=c(1,1,2,2),more=TRUE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression(italic(F)[ST]), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

cp.lv<-levelplot(cp,col.regions=rev.cols,alpha.regions=0.7,
                 scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(cp.lv,split=c(1,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("covariance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

jost.lv<-levelplot(jostpw,col.regions=cols,alpha.regions=0.7,
                   scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(jost.lv,split=c(2,1,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text(expression("Jost's"~italic(D)), 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

ptdist.lv<-levelplot(pt.dist,col.regions=cols,alpha.regions=0.7,
                     scales = list(x=list(rot=90),tck = 0),xlab="",ylab="")
print(ptdist.lv,split=c(2,2,2,2),more=FALSE,newpage=FALSE,panel.width=hm.width,
      panel.height=hm.height,cex=2)
trellis.focus("legend", side="right", clipp.off=TRUE, highlight=FALSE)
grid.text("PopTree2\nDistance", 0.2, 0, hjust=0.5, vjust=1.2,gp=gpar(cex=0.75))
trellis.unfocus()

dev.off()
Figure 3. Heatmap

Figure 3. Heatmap

Figure 4

Figure 4 is the Poptree2 and Treemix trees next to each other

plotName<-"p4.trees.png"
tm.tree<-"treemix/p4_k100bFLPBrm4"
pt.subtree<-read.tree("poptree/p4.nwk")
rect.start<-0.1
clcolr <- rep("grey27", dim(pt.subtree$edge)[1])
clcolr[c(6,19,20,21,22)]<-all.colors[3]
pt.subtree$node.label<-round(as.numeric(pt.subtree$node.label)*100)
pt.subtree$tip.label[pt.subtree$tip.label=="FLLG"]<-"FLFW"
png(plotName,height=5,width=10,units="in",res=300)
par(mfrow=c(1,2),oma=c(1,0,1,0),mar=c(1,1,1,0.1))
plot.phylo(pt.subtree,tip.color = pt.colors,#align.tip.label = T,#show.node.label = TRUE,
           edge.color = clcolr,edge.width = 2,label.offset = 0.0015,font=1)
nodelabels(pt.subtree$node.label,cex=0.75,font=2,frame="none",adj=c(1,-0.2))
mtext("PopTree2",3)
t3<-plot_tree(tm.tree,"treemix/poporder",plus=0.05,scale=F,mbar=F,arrow=0.1,
              tip.order = tip.names,lwd = 2,branch.cols = branch.cols,xlab=F)
##     V1   V2       V3      V4      V5  V6  V7 V8  V9 V10
## 1    1 FLSG NOT_ROOT NOT_MIG     TIP  16  NA NA  NA  NA
## 2    2 <NA> NOT_ROOT NOT_MIG NOT_TIP  52  51  1  16   8
## 3    3 TXCC NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 4    4 FLFD NOT_ROOT NOT_MIG     TIP 256  NA NA  NA  NA
## 5   15 FLPB NOT_ROOT NOT_MIG     TIP 473  NA NA  NA  NA
## 6   16 <NA> NOT_ROOT NOT_MIG NOT_TIP   2   1  1 304   7
## 7   31 FLLG NOT_ROOT NOT_MIG     TIP 412  NA NA  NA  NA
## 8   32 <NA> NOT_ROOT NOT_MIG NOT_TIP 473  52 12 212   3
## 9   51 FLKB NOT_ROOT NOT_MIG     TIP   2  NA NA  NA  NA
## 10  52 <NA> NOT_ROOT NOT_MIG NOT_TIP  32   2  9 256   3
## 11  75 TXFW NOT_ROOT NOT_MIG     TIP 104  NA NA  NA  NA
## 12  76 <NA> NOT_ROOT NOT_MIG NOT_TIP 304 472  2 303   1
## 13 103 ALFW NOT_ROOT NOT_MIG     TIP 472  NA NA  NA  NA
## 14 104 <NA> NOT_ROOT NOT_MIG NOT_TIP 304  75  1 356   3
## 15 135 FLAB NOT_ROOT NOT_MIG     TIP 588  NA NA  NA  NA
## 16 136 <NA> NOT_ROOT     MIG NOT_TIP  32  52 NA  NA  NA
## 17 171 TXSP NOT_ROOT NOT_MIG     TIP 172  NA NA  NA  NA
## 18 172 <NA> NOT_ROOT NOT_MIG NOT_TIP 356   3  1 171   1
## 19 211 FLHB NOT_ROOT NOT_MIG     TIP 212  NA NA  NA  NA
## 20 212 <NA> NOT_ROOT NOT_MIG NOT_TIP  32 211  1 412   2
## 21 255 FLSI NOT_ROOT NOT_MIG     TIP 588  NA NA  NA  NA
## 22 256 <NA> NOT_ROOT NOT_MIG NOT_TIP  52   4  1 588   2
## 23 303 ALST NOT_ROOT NOT_MIG     TIP  76  NA NA  NA  NA
## 24 304 <NA> NOT_ROOT NOT_MIG NOT_TIP  16 104  4  76   3
## 25 355 TXCB NOT_ROOT NOT_MIG     TIP 356  NA NA  NA  NA
## 26 356 <NA> NOT_ROOT NOT_MIG NOT_TIP 104 172  2 355   1
## 27 411 FLCC NOT_ROOT NOT_MIG     TIP 412  NA NA  NA  NA
## 28 412 <NA> NOT_ROOT NOT_MIG NOT_TIP 212 411  1  31   1
## 29 471 LAFW NOT_ROOT NOT_MIG     TIP 472  NA NA  NA  NA
## 30 472 <NA> NOT_ROOT NOT_MIG NOT_TIP  76 471  1 103   1
## 31 473 <NA>     ROOT NOT_MIG NOT_TIP 473  15  1  32  15
## 32 490 <NA> NOT_ROOT     MIG NOT_TIP 104  75 NA  NA  NA
## 33 546 <NA> NOT_ROOT     MIG NOT_TIP 412  31 NA  NA  NA
## 34 588 <NA> NOT_ROOT NOT_MIG NOT_TIP 256 255  1 135   1
## 35 641 <NA> NOT_ROOT     MIG NOT_TIP 104  75 NA  NA  NA
##                                                                                                                                                                                                                                                                                                                                                                                                                              V11
## 1                                                                                                                                                                                                                                                                                                                                                                                                               FLSG:0.000248367
## 2                                                                                                                                                                                 (FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358
## 3                                                                                                                                                                                                                                                                                                                                                                                                               TXCC:0.000281411
## 4                                                                                                                                                                                                                                                                                                                                                                                                               FLFD:0.000926406
## 5                                                                                                                                                                                                                                                                                                                                                                                                                         FLPB:0
## 6                                                                                                                                                                                                               (FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825
## 7                                                                                                                                                                                                                                                                                                                                                                                                                 FLLG:0.0510187
## 8            (((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0
## 9                                                                                                                                                                                                                                                                                                                                                                                                               FLKB:0.000474655
## 10                                                                                          ((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382
## 11                                                                                                                                                                                                                                                                                                                                                                                                                TXFW:0.0269586
## 12                                                                                                                                                                                                                                                                                                                                                                ((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786
## 13                                                                                                                                                                                                                                                                                                                                                                                                               ALFW:0.00569026
## 14                                                                                                                                                                                                                                                                                                                        (TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066
## 15                                                                                                                                                                                                                                                                                                                                                                                                                FLAB:0.0146583
## 16                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 17                                                                                                                                                                                                                                                                                                                                                                                                               TXSP:0.00121161
## 18                                                                                                                                                                                                                                                                                                                                                                                 (TXCC:0.000281411,TXSP:0.00121161):0.00205677
## 19                                                                                                                                                                                                                                                                                                                                                                                                              FLHB:0.000682617
## 20                                                                                                                                                                                                                                                                                                                                                    (FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155
## 21                                                                                                                                                                                                                                                                                                                                                                                                               FLSI:8.2722e-05
## 22                                                                                                                                                                                                                                                                                                                                                     (FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607
## 23                                                                                                                                                                                                                                                                                                                                                                                                                        ALST:0
## 24                                                                                                                                                                                                                                             ((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792
## 25                                                                                                                                                                                                                                                                                                                                                                                                               TXCB:0.00319305
## 26                                                                                                                                                                                                                                                                                                                                                    ((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619
## 27                                                                                                                                                                                                                                                                                                                                                                                                               FLCC:0.00110869
## 28                                                                                                                                                                                                                                                                                                                                                                                  (FLCC:0.00110869,FLLG:0.0510187):0.000504332
## 29                                                                                                                                                                                                                                                                                                                                                                                                                LAFW:0.0118029
## 30                                                                                                                                                                                                                                                                                                                                                                                    (LAFW:0.0118029,ALFW:0.00569026):0.0176978
## 31 (FLPB:0,(((FLKB:0.000474655,(FLSG:0.000248367,((TXFW:0.0269586,((TXCC:0.000281411,TXSP:0.00121161):0.00205677,TXCB:0.00319305):0.00396619):0.00747066,((LAFW:0.0118029,ALFW:0.00569026):0.0176978,ALST:0):0.00502786):0.0120792):0.000664825):0.00127358,(FLFD:0.000926406,(FLSI:8.2722e-05,FLAB:0.0146583):0.00107509):0.00123607):0.0203382,(FLHB:0.000682617,(FLCC:0.00110869,FLLG:0.0510187):0.000504332):0.00107155):0);
## 32                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 33                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
## 34                                                                                                                                                                                                                                                                                                                                                                                   (FLSI:8.2722e-05,FLAB:0.0146583):0.00107509
## 35                                                                                                                                                                                                                                                                                                                                                                                                                          <NA>
##              x       y   ymin   ymax
## 1  0.022525012 0.84375 0.8125 0.8750
## 2  0.021611820 0.87500 0.3750 0.9375
## 3  0.046415117 0.71875 0.6875 0.7500
## 4  0.022500716 0.34375 0.3125 0.3750
## 5  0.000000000 0.96875 0.9375 1.0000
## 6  0.022276645 0.81250 0.3750 0.8750
## 7  0.052594582 0.03125 0.0000 0.0625
## 8  0.000000000 0.18750 0.0000 0.9375
## 9  0.022086475 0.90625 0.8750 0.9375
## 10 0.020338240 0.37500 0.1875 0.9375
## 11 0.067069276 0.78125 0.7500 0.8125
## 12 0.037667946 0.43750 0.3750 0.5625
## 13 0.053147151 0.46875 0.4375 0.5000
## 14 0.040110746 0.75000 0.5625 0.8125
## 15 0.029966867 0.21875 0.1875 0.2500
## 16 0.011480800      NA 0.0000 0.1875
## 17 0.047345316 0.65625 0.6250 0.6875
## 18 0.046133706 0.68750 0.6250 0.7500
## 19 0.001754167 0.15625 0.1250 0.1875
## 20 0.001071550 0.12500 0.0000 0.1875
## 21 0.022732122 0.28125 0.2500 0.3125
## 22 0.021574310 0.31250 0.1875 0.3750
## 23 0.037667946 0.40625 0.3750 0.4375
## 24 0.032640086 0.56250 0.3750 0.8125
## 25 0.046647245 0.59375 0.5625 0.6250
## 26 0.044076936 0.62500 0.5625 0.7500
## 27 0.002684572 0.09375 0.0625 0.1250
## 28 0.001575882 0.06250 0.0000 0.1250
## 29 0.059259791 0.53125 0.5000 0.5625
## 30 0.047456891 0.50000 0.4375 0.5625
## 31 0.000000000 0.93750 0.0000 1.0000
## 32 0.064357446      NA 0.5625 0.7500
## 33 0.022581282      NA 0.0000 0.0625
## 34 0.022649400 0.25000 0.1875 0.3125
## 35 0.067069276      NA 0.5625 0.7500
## [1] "0.564492 0 0.02033824"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
## [1] "0.41172 0.001575882 0.052594582"
## [1] "0.899407 0.0401107462921379 0.0670692762921379"
##  [1] 0.022525012 0.046415117 0.022500716 0.000000000 0.052594582
##  [6] 0.022086475 0.067069276 0.053147151 0.029966867 0.047345316
## [11] 0.001754167 0.022732122 0.037667946 0.046647245 0.002684572
## [16] 0.059259791
## [1] 0.003
mtext("Treemix",3)
ybar<-0.01
mcols = rev( heat.colors(150) )
mcols = mcols[50:length(mcols)]
ymi = ybar+rect.start
yma = ybar+0.3
l = 0.2
w = l/100
xma = max(t3$d$x/20)
rect( rep(rect.start, 100), ymi+(0:99)*w, rep(rect.start+xma, 100), ymi+(1:100)*w, col = mcols, border = mcols)
text(rect.start+xma+0.001, ymi, lab = "0", adj = 0)
text(rect.start+xma+0.001, yma, lab = "0.5", adj = 0)
text(rect.start, yma+0.07, lab = "Migration", adj = 0 )
text(rect.start, yma+0.04, lab = "weight", adj = 0 )
dev.off()
## png 
##   2
Figure 4. Trees

Figure 4. Trees

Figure 5

I need to re-run populations because I don’t seem to have the correct fst files. I can use the whitelist and hopefully include bootstrapping and kernel smoothing:

populations -b 2 -W fwsw_results/subset.whitelist.txt -P fwsw_results/stacks -M fwsw_pops_map.txt --fstats --vcf_haplotypes --genomic --bootstrap -k

And then I will identify the shared outliers among them. But first, let’s define some names.

vcf<-parse.vcf("p4.upd.vcf")
vcf$SNP<-paste(vcf$`#CHROM`,vcf$POS,sep=".")
scaffs<-levels(as.factor(vcf[,1]))
scaffs[1:22]<-lgs
scaff.starts<-tapply(vcf$POS,vcf$`#CHROM`,max)
scaff.starts<-data.frame(rbind(cbind(names(scaff.starts),scaff.starts)),stringsAsFactors = F)
locus.info<-c(colnames(vcf[1:9]),"SNP")
dd.plot.name<-as.character("separate_delta-divergence.png")
dd.name<-as.character("sep.deltadivergence.txt")
sdd.name<-as.character("sep.smoothedDD.out.txt")
afs.plot.name<-as.character("p4.All_AFS.png")
stacks.sig.out<-"p4.stacks.sig.snps.txt"
annotations.name<-"p4.StacksFWSWOutliers_annotatedByGenome.csv"
bf.file<-"bayenv/p4.bf.txt"
fwsw.tx<-read.delim("stacks/p4.fst_TXCC-TXFW.txt")
fwsw.la<-read.delim("stacks/p4.fst_ALST-LAFW.txt")
fwsw.al<-read.delim("stacks/p4.fst_ALFW-ALST.txt")
fwsw.fl<-read.delim("stacks/p4.fst_FLCC-FLLG.txt")
fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.

tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
#are they using the same SNPs or different SNPs?
stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
for(i in 1:nrow(fw.shared.chr)){
  tx.bp<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01 & fwsw.tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  la.bp<-fwsw.la[fwsw.la$Fisher.s.P<0.01 & fwsw.la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  al.bp<-fwsw.al[fwsw.al$Fisher.s.P<0.01 & fwsw.al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  fl.bp<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01 & fwsw.fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
  stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
  stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
  stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
  stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
}
colnames(stacks.sig)<-c("TX","LA","AL","FL")
stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
write.table(stacks.sig,stacks.sig.out,sep='\t',row.names=FALSE,col.names=TRUE)
#+ annotations, eval=FALSE
gff<-read.delim(gzfile("../../scovelli_genome/ssc_2016_12_20_chromlevel.gff.gz"),header=F)
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
genome.blast<-read.csv("../../scovelli_genome/ssc_2016_12_20_cds_nr_blast_results.csv",skip=1,header=T)#I saved it as a csv

#' extract the significant regions from the gff file
fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
  this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
  this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
  if(nrow(this.reg) == 0){
    if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region="beyond.last.contig", description=NA,SSCID=NA)
    }else{
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region=NA,description=NA,SSCID=NA)
    }
  }else{
    if(length(grep("SSCG\\d+",this.reg$attribute))>0){
      geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
      gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
    }else{
      geneID<-NA
      gene<-NA
    }
    new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                    region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
  }
  return(as.data.frame(new))
}))
write.csv(fw.sig.reg,annotations.name)
fw.sig.reg<-read.csv(annotations.name)

There are 53 shared SNPs using the Fisher’s P as a cutoff.

The bayes factors and Fsts are all in the following chunk.

fwsw<-read.delim("stacks/fw-sw_populations/batch_2.fst_marine-freshwater.tsv")
#Compare neighboring pops.

tx.sig<-fwsw.tx[fwsw.tx$Fisher.s.P<0.01,"Locus.ID"]
la.sig<-fwsw.la[fwsw.la$Fisher.s.P<0.01,"Locus.ID"]
al.sig<-fwsw.al[fwsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sig<-fwsw.fl[fwsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
## [1] 962
length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
## [1] 579
length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
## [1] 679
all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
bf<-read.delim(bf.file)
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,9,6)]
stacks.sig<-read.delim(stacks.sig.out)

Figure 5 includes Fst data, Bayes Factors data, colors from Structure, and reciprocal monophyly data, plus smoothed Fst values (which I think I will leave out this round).

Plot Fig. 5

Now I’m ready to plot Figure 5.

fig5.name<-"p4_stacks_fsts_fwsw_bf.png"
addLines<-FALSE
addSmooth<-TRUE
#' Set up the plotting utilities
all.chr<-data.frame(Chr=c(as.character(fwsw.tx$Chr),as.character(fwsw.la$Chr),
                          as.character(fwsw.al$Chr),as.character(fwsw.fl$Chr),
                          as.character(bf$scaffold)),
  BP=c(fwsw.tx$BP,fwsw.la$BP,fwsw.al$BP,fwsw.fl$BP,bf$BP),stringsAsFactors = F)
bounds<-tapply(as.numeric(as.character(all.chr$BP)), all.chr$Chr,max)
bounds<-data.frame(Chrom=dimnames(bounds),End=bounds)
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chr]
plot.scaffs[1:22]<-lgs
bounds<-bounds[match(plot.scaffs,bounds$Chrom),]
#generate info

fwsw.plot<-assign.plotpos(fwsw,plot.scaffs,bounds,df.bp="BP",df.chrom = "Chr")
addLines<-TRUE
addLines<-FALSE
addSmooth<-TRUE
#' plot with the outlier regions
#' Does NOT include monophyletic neighborjoining trees.
png(fig5.name,height=6,width=8,units="in",res=300)
par(mfrow=c(5,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
#par(fig=c(0,1,0.9-0.9/5,0.9))

fwswt.fst<-fst.plot(fwsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols = c(grp.colors[1],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswt.fst$plot.pos,fwswt.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswt.fst$BP,fwswt.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[1])
#points(fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],fwswt.fst$Corrected.AMOVA.Fst[fwswt.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswt.fst$plot.pos),0,1)
abline(v=fwswt.fst$plot.pos[fwswt.fst$Locus.ID %in% all.shared],col="gray47")
mtext("TXFW vs. TXCC",2,cex=0.75)#,line=-1)
labs<-tapply(fwswt.fst$plot.pos,fwswt.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswl.fst<-fst.plot(fwsw.la,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswl.fst$plot.pos,fwswl.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswl.fst$BP,fwswl.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
#points(fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],fwswl.fst$Corrected.AMOVA.Fst[fwswl.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswl.fst$plot.pos),0,1)
abline(v=fwswl.fst$plot.pos[fwswl.fst$Locus.ID %in% all.shared],col="gray47")
mtext("LAFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswl.fst$plot.pos,fwswl.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswa.fst<-fst.plot(fwsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[3],grp.colors[2]),pt.cex=1,axis.size = 1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswa.fst$plot.pos,fwswa.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswa.fst$BP,fwswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[4])
#points(fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],fwswa.fst$Corrected.AMOVA.Fst[fwswa.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswa.fst$plot.pos),0,1)
abline(v=fwswa.fst$plot.pos[fwswa.fst$Locus.ID %in% all.shared],col="gray47")
mtext("ALFW vs. ALST",2,cex=0.75)#,line=-1)
labs<-tapply(fwswa.fst$plot.pos,fwswa.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)

fwswf.fst<-fst.plot(fwsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,pch=19,
                    pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex=1,axis.size=1)
if(addLines==TRUE){ perlg.add.lines(fwsw.plot,lgs) }
if(addSmooth==TRUE){ points(fwswf.fst$plot.pos,fwswf.fst$Smoothed.Fst,col="cornflowerblue",type="l") }
#points(fwswf.fst$BP,fwswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
#points(fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],fwswf.fst$Corrected.AMOVA.Fst[fwswf.fst$Locus.ID %in% all.shared],
#       pch=1,col="black",cex=1.3)
clip(0,max(fwswf.fst$plot.pos),0,1)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("FLFW vs. FLCC",2,cex=0.75)#,line=-1)
mtext(expression(bold(italic(F)[ST])),2,outer=T,line=-1,cex=0.75)
labs<-tapply(fwswf.fst$plot.pos,fwswf.fst$Chr,median)
text(x=labs[lgs],y=-0.1,labels=lgn,xpd=TRUE)
#BF
bs.sal<-fst.plot(bf,fst.name="logSal",chrom.name="scaffold",bp.name = "BP",scaffold.widths=bounds,
                 scaffs.to.plot = plot.scaffs,pch=19,axis.size = 1,pt.cex = 1)
points(bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"plot.pos"],
       bs.sal[bs.sal$SNP %in% sal.bf.sig$SNP,"logSal"],
       col="cornflowerblue",pch=19)#give the outliers a color
clip(0,max(bs.sal$plot.pos),-3,241)
abline(v=fwswf.fst$plot.pos[fwswf.fst$Locus.ID %in% all.shared],col="gray47")
mtext("log(Salinity BF)",2,cex=0.75,line=2.1)
labs<-tapply(bs.sal$plot.pos,bs.sal$scaffold,median)
text(x=labs[lgs],y=-5,labels=lgn,xpd=TRUE)
#points(bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"plot.pos"],
#       bs.sal[bs.sal$SNP %in% stacks.sig$SNP,"logSal"],
#       col="cornflowerblue",cex=1.3)
#clip(min(bs.sal$plot.pos),max(bs.sal$plot.pos),
#     min(bs.sal$logSal),max(bs.sal$logSal))
#abline(h=log(bf.co["Salinity_BF"]),col="cornflowerblue",lwd=2)


dev.off()
## png 
##   2
Figure 5. Fsts

Figure 5. Fsts

There seems to be an overabundance of shared outliers on LG4. I can test this using the multinomial distribution.

if(!("stacks.sig" %in% ls())){
  stacks.sig<-read.delim("p4.stacks.sig.snps.txt")
} 
if(is.null(stacks.sig$SNP)){ #make sure the SNP column is there.
  stacks.sig$SNP<-paste(stacks.sig$Chr,as.numeric(stacks.sig$BP)+1,sep=".")
}
sig.chrom<-stacks.sig[stacks.sig$Chr %in% lgs,]
sig.chrom$Chr<-factor(sig.chrom$Chr)
nloci<-nrow(sig.chrom)
nchrom<-length(lgs)
exp.perchrom<-rep(nloci/length(lgs),length(lgs))
names(exp.perchrom)<-lgs
exp.perchrom
##      LG1      LG2      LG3      LG4      LG5      LG6      LG7      LG8 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 
##      LG9     LG10     LG11     LG12     LG13     LG14     LG15     LG16 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364 
##     LG17     LG18     LG19     LG20     LG21     LG22 
## 2.136364 2.136364 2.136364 2.136364 2.136364 2.136364
obs.perchrom<-tapply(sig.chrom$Locus.ID,sig.chrom$Chr,length)[lgs]
names(obs.perchrom)<-lgs
obs.perchrom[is.na(obs.perchrom)]<-0
obs.perchrom
##  LG1  LG2  LG3  LG4  LG5  LG6  LG7  LG8  LG9 LG10 LG11 LG12 LG13 LG14 LG15 
##    5    4    1   14    0    3    0    0    1    1    0    5    1    6    0 
## LG16 LG17 LG18 LG19 LG20 LG21 LG22 
##    0    0    2    1    3    0    0
dmultinom(x = obs.perchrom,prob = exp.perchrom)
## [1] 1.333996e-25

I can also look for overlapping loci in pairwise saltwater-saltwater comparisons.

swsw.name<-"p4_stacks_swsw.png"
swsw.tx<-read.delim("stacks/p4.fst_TXCB-TXCC.txt")
swsw.al<-read.delim("stacks/p4.fst_ALST-FLSG.txt")
swsw.fl<-read.delim("stacks/p4.fst_FLCC-FLHB.txt")
###### SW-SW neighbors ######

tx.sw.sig<-swsw.tx[swsw.tx$Fisher.s.P<0.01,"Locus.ID"]
al.sw.sig<-swsw.al[swsw.al$Fisher.s.P<0.01,"Locus.ID"]
fl.sw.sig<-swsw.fl[swsw.fl$Fisher.s.P<0.01,"Locus.ID"]
length(tx.sw.sig[(tx.sw.sig %in% c(al.sw.sig,fl.sw.sig))])
## [1] 106
length(al.sw.sig[(al.sw.sig %in% c(tx.sw.sig,fl.sw.sig))])
## [1] 135
length(fl.sw.sig[(fl.sw.sig %in% c(tx.sw.sig,al.sw.sig))])
## [1] 35
sw.shared<-fl.sw.sig[fl.sw.sig %in% al.sw.sig & fl.sw.sig %in% tx.sw.sig]

png(swsw.name,height=6,width=7.5,units="in",res=300)
par(mfrow=c(3,1),mar=c(0.85,2,0,0.5),oma=c(1,1,1,0.5))
swswt.fst<-fst.plot(swsw.tx,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[1],grp.colors[2]),pt.cex = 1,pch=19)
mtext("TXCC vs. TXCB",3,line=-1,cex=0.75)
swswa.fst<-fst.plot(swsw.al,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", y.lim=c(0,1),
                    scaffs.to.plot=plot.scaffs, scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[2],grp.colors[3]),pt.cex = 1,pch=19)
#points(swswa.fst$plot.pos,swswa.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[3])
mtext("ALST vs. FLSG",3,line=-1,cex=0.75)
swswf.fst<-fst.plot(swsw.fl,fst.name = "Corrected.AMOVA.Fst", bp.name = "BP",chrom.name = "Chr", 
                    scaffs.to.plot=plot.scaffs, y.lim = c(0,1),scaffold.widths = bounds,axis.size = 1,
                    pt.cols=c(grp.colors[6],grp.colors[5]),pt.cex = 1,pch=19)
#points(swswf.fst$plot.pos,swswf.fst$Corrected.AMOVA.Fst,pch=21,bg=grp.colors[6])
mtext("FLCC vs. FLHB",3,line=-1,cex=0.75)
mtext(expression(italic(F)[ST]),2,outer=T,line=-0.75,cex=0.75)
last<-0
for(i in 1:length(lgs)){
  text(x=mean(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"]),y=-0.07,
       labels=lgn[i], adj=1, xpd=TRUE)
  last<-max(swswf.fst[swswf.fst$Chr ==lgs[i],"plot.pos"])
}
dev.off()
## png 
##   2
SWSW Fsts

SWSW Fsts

Diversity statistics

In Figure 6, we show different measures of diversity: nucleotide diversity (\(\pi\)), heterozygosity (H), Jost’s D, and \(\delta\)-divergence. Additionally, we include the shared Fst outliers and putative loci, and highlight regions with high \(\pi\) and H.

Allele Frequency Spectrum

#' Calculate AFS from vcf
fw.afs<-lapply(fw.list,function(pop){
  this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
  this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(fw.afs)<-c("TXFW","LAFW","ALFW","FLFW")
sw.afs<-lapply(sw.list,function(pop){
  this.vcf<-cbind(vcf[,locus.info],vcf[,grep(pop,colnames(vcf))])
  this.afs<-do.call(rbind,apply(this.vcf,1,calc.afs.vcf))
})
names(sw.afs)<-sw.list
all.afs<-c(fw.afs,sw.afs)
Figure S1. Fsts

Figure S1. Fsts

Pi

pi.file.name<-"p4.pi.txt"
avgpi.file.name<-"p4.avgpi.txt"
all.pi<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Pi=unlist(apply(vcf,1,calc.pi)))
all.pi$SNP<-paste(all.pi$Chrom,as.numeric(as.character(all.pi$Pos)),sep=".")
write.table(all.pi,pi.file.name,col.names = TRUE,row.names=FALSE, quote=FALSE,sep='\t')
all.pi<-read.table(pi.file.name,header=T)
avg.pi.adj<-read.table(avgpi.file.name,header=T)

Heterozygosity

all.het.name<-"p4.avg.het.txt"
avg.het.adj.name<-"p4.avg.het.adj.txt"
#het
avg.het<-do.call("rbind",sliding.window(vcf,lgs,stat="het",width=10))
avg.het.adj<-fst.plot(avg.het,scaffold.widths=scaff.starts,pch=19,
                     fst.name = "Avg.Stat",chrom.name = "Chr",bp.name = "Avg.Pos")
all.het<-data.frame(Chrom=vcf$`#CHROM`,Pos=vcf$POS,Het=unlist(apply(vcf,1,calc.het)))
all.het$SNP<-paste(all.het$Chrom,as.numeric(as.character(all.het$Pos)),sep=".")
write.table(avg.het.adj,avg.het.adj.name,sep='\t',col.names=TRUE)
write.table(all.het,all.het.name,sep='\t',col.names=TRUE)
avg.het.adj<-read.delim(avg.het.adj.name,header=TRUE)
all.het<-read.delim(all.het.name,header=TRUE)

delta-divergence

swfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = sw.list,pop.list2 = fw.list,maf.cutoff=0.01)
fwfw.mu<-calc.mean.fst(vcf = vcf,pop.list1 = fw.list,pop.list2 = fw.list, maf.cutoff=0.01)
deltad<-merge(swfw.mu,fwfw.mu,by="SNP")
 deltad<-deltad[,c("SNP","Chrom.x","Pos.x","Mean.Fst.x","Mean.Fst.y")]
colnames(deltad)<-c("SNP","Chrom","Pos","MeanSWFW.Fst","MeanFWFW.Fst")
deltad$deltad<-deltad$MeanSWFW.Fst - deltad$MeanFWFW.Fst
deltad<-deltad[!is.na(deltad$deltad),]#remove NAs
write.table(deltad, dd.name,sep='\t',col.names = TRUE,row.names = FALSE)
#' ```

This plotting function also generates the smoothed \(\delta\)-divergence values (which are not saved) and identifies the outliers (in 99% and 1% quantiles, and which are saved in sdd.out).

Jost’s D

First, I must calculate Jost’s D on each locus in the p4 dataset

sub.genind<-read.structure("stacks/subset.structure.stru",n.ind=698,
                           n.loc=9638,col.lab=1,col.pop=2,sep='\t',
                           row.marknames = 2,onerowperind=FALSE,ask=FALSE)
sub.genind@pop<-factor(gsub("sample_(\\w{4}).*","\\1",rownames(sub.genind@tab)))
jostd<-D_Jost(sub.genind) 
write.table(jostd$per.locus,"p4.jostd.perlocus.txt",sep='\t',col.names=FALSE,row.names = TRUE,quote=F)

Now this is done, so I don’t need to evaluate it again – I’ll just need to read it in from a file when I want to use it.

jostd<-read.delim(jostd.name,header=F)
colnames(jostd)<-c("locid","D")
jostd$SNP<-vcf$SNP
jostd$POS<-vcf$POS
jostd$Chr<-vcf$`#CHROM`
jostd$ID<-vcf$ID

Smooth the statistics

smoothed.name<-"p4_deltad_pi_het.png"
#assign the plotting positions relative to all loci
stacks.sig<-assign.plotpos(stacks.sig,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="BP")
dd<-assign.plotpos(deltad,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
pi<-assign.plotpos(all.pi,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
ht<-assign.plotpos(all.het,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chrom",df.bp="Pos") 
jd<-assign.plotpos(jostd,plot.scaffs = plot.scaffs,bounds=bounds,df.chrom="Chr",df.bp="POS") 
colnames(jd)<-c("locid","D","SNP","Pos","Chrom","ID","plot.pos") #standardize the names

smooth.per.chrom<-function(df,lgs,df.chrom,df.bp,df.stat,out.name,...){#span=0.1,degree=2
  smooth.all<-data.frame(stringsAsFactors = FALSE) #all smoothed values
  smooth.low<-data.frame(stringsAsFactors = FALSE) #lower outliers
  smooth.upp<-data.frame(stringsAsFactors = FALSE) #upper outliers
  for(i in 1:length(lgs)){
    this.chrom<-df[df[,df.chrom] %in% lgs[i],]
    this.smooth<-loess.smooth(this.chrom[,df.bp],this.chrom[,df.stat],...) 
    this.smooth<-data.frame(chr=cbind(rep(as.character(lgs[i]),length(this.smooth$x)),
                                  pos=as.numeric(this.smooth$x),
                                  smoothed.stats=as.numeric(this.smooth$y)),stringsAsFactors = FALSE)
    this.upp<-this.chrom[this.chrom[df.stat]>=quantile(this.chrom[,df.stat],0.99),]#choosing the upper outliers
    this.low<-this.chrom[this.chrom[,df.stat]<=quantile(this.chrom[,df.stat],0.01),]#choosing the lower outliers
                   
    # save the data
    smooth.all<-data.frame(rbind(smooth.all,this.smooth))
    smooth.low<-rbind(smooth.low,this.low)
    smooth.upp<-rbind(smooth.upp,this.upp)
  }
   
  smooth.upp$direction<-"upper" 
  smooth.low$direction<-"lower"
  smooth.out<-rbind(smooth.low,smooth.upp) # put the outliers in one df
  colnames(smooth.all)<-c("chr","pos","smoothed.stats")
  smooth.all$pos<-as.numeric(as.character(smooth.all$pos))
  smooth.all$smoothed.stats<-as.numeric(as.character(smooth.all$smoothed.stats))
  # save to files
  write.table(smooth.out,paste(out.name,"outliers.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
  write.table(smooth.all,paste(out.name,"smoothed.txt",sep=""),col.names=T,row.names=F,quote=F,sep='\t')
  smoothed<-list(smooth.out,smooth.all)
}

#smooth the statistics
dd.bp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="deltad",out.name="dd.bp",span=.1,degree=2) 
pi.bp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Pi",out.name="pi.bp",span=0.1,degree=2) 
ht.bp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="Het",out.name="ht.bp",span=0.1,degree=2)
jd.bp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="Pos",df.stat="D",out.name="jd.bp",span=0.1,degree=2)
#smooth the statistics
dd.pp.smooth<-smooth.per.chrom(dd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="deltad",out.name="dd",span=0.1,degree=2)
pi.pp.smooth<-smooth.per.chrom(pi,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Pi",out.name="pi",span=0.1,degree=2) 
ht.pp.smooth<-smooth.per.chrom(ht,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="Het",out.name="ht",span=0.1,degree=2) 
jd.pp.smooth<-smooth.per.chrom(jd,lgs,df.chrom="Chrom",df.bp="plot.pos",df.stat="D",out.name="jd",span=0.1,degree=2) 
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
plot.smooth.stats<-function(df,smooth,stat.name,color,ylabel,...){
  dd<-fst.plot(fst.dat = df,...)
  mtext(ylabel,2,line=1.5,cex=0.75)
  points(x=as.numeric(as.character(smooth[[2]]$pos)),
         y=as.numeric(as.character(smooth[[2]]$smoothed.stats)),col=color,type="l",lwd=2)
  points(x=as.numeric(smooth[[2]]$pos),y=as.numeric(smooth[[2]]$smoothed.stats),col=color,type="l",lwd=2)
  points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="upper"],smooth[[1]][smooth[[1]]$direction=="upper",stat.name],
         pch=24,bg=color,col=color,cex=0.5) #add  outliers
  points(smooth[[1]]$plot.pos[smooth[[1]]$direction=="lower"],smooth[[1]][smooth[[1]]$direction=="lower",stat.name],
         pch=25,bg=color,col=color,cex=0.5)
}
xlab.lgs<-function(df,lgs,lgn,chr,bp,...){
  last<-0
  for(i in 1:length(lgs)){
    text(x=median(df[df[,chr] ==lgs[i],bp]),labels=lgn[i],...)
    last<-max(df[df[,chr]  ==lgs[i],bp])
  }
}
png(smoothed.name,height=7,width=5,units="in",res=300)
par(mfrow=c(4,1),mar=c(1,1,1,1),oma=c(1,2,1,1),cex=0.75)
t<-plot.smooth.stats(df=dd,smooth=dd.pp.smooth,stat.name="deltad",
                     color=comp.col["deltad"],fst.name="deltad",bp.name="Pos",
                  ylabel=expression(paste(delta,"-divergence")),y.lim=c(-0.5,1),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(dd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.5,cex=0.75)
#plot Jost's D
d<-plot.smooth.stats(df=jd,smooth=jd.pp.smooth,stat.name="D",
                     color=comp.col["D"],fst.name="D",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression("Jost's"~italic(D)),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(jd,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.075,cex=0.75)
#plot pi
p<-plot.smooth.stats(df=pi,smooth=pi.pp.smooth,stat.name="Pi",
                     color=comp.col["pi"],fst.name="Pi",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression(pi),y.lim=c(0,0.5),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(pi,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)
#abline(v=stacks.sig$plot.pos,col="grey10") #stacks
#plot heterozygosity
h<-plot.smooth.stats(df=ht,smooth=ht.pp.smooth,stat.name="Het",
                     color=comp.col["Het"],fst.name="Het",chrom.name = "Chrom",bp.name="Pos",
                  ylabel=expression(italic(H)),
                  axis=0.75,scaffold.widths=scaff.starts,pch=19,scaffs.to.plot = scaffs)
xlab.lgs(ht,lgs,lgn,"Chrom","plot.pos",adj=1, xpd=TRUE,y=-0.05,cex=0.75)



dev.off()
Figure x. Smoothed variables

Figure x. Smoothed variables

bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
all.out<-data.frame(rbind(cbind(paste(fw.sig.reg$Chr,fw.sig.reg$BP,sep="."),"fst"),
                          cbind(dd.bp.smooth[[1]]$SNP,"deltad"),
                          cbind(ht.bp.smooth[[1]]$SNP,"het"),
                          cbind(jd.bp.smooth[[1]]$SNP,"jostd"),
                          cbind(pi.bp.smooth[[1]]$SNP,"pi"),
                          cbind(sal.bf.sig$SNP,"bayenv_salinity")),stringsAsFactors = FALSE)
colnames(all.out)<-c("SNP","test")
write.table(all.out,"all_outliers.txt",sep='\t',quote=FALSE,col.names=TRUE,row.names=FALSE)

Use putative genes

Here, I compare the Fst outliers and \(\delta\)-divergence outliers to putative genes identified from other studies of teleosts adapting to freshwater environments.

#putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#ld info
ld<-read.delim("p4.ld_info_fwsw.txt")

gff.name<-"ssc_2016_12_20_chromlevel.gff.gz"
#genome annotations
if(length(grep("gz",gff.name))>0){
  gff<-read.delim(gzfile(paste("../../scovelli_genome/",gff.name,sep="")),header=F)
} else{
  gff<-read.delim(paste("../../scovelli_genome/",gff.name,sep=""),header=F)
}
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
genome.blast<-read.csv("../../scovelli_genome/ssc_2016_12_20_cds_nr_blast_results.csv",skip=1,header=T)#I saved it as a csv

Define some functions

These are updated from fwsw_analysis.R and are specific to these analyses, and are not generalizable!

ssc.fwsw.fstsig<-function(tx, la, al, fl, cutoff)
{
  tx.sig<-tx[tx$Fisher.s.P<cutoff,"Locus.ID"]
  la.sig<-la[la$Fisher.s.P<cutoff,"Locus.ID"]
  al.sig<-al[al$Fisher.s.P<cutoff,"Locus.ID"]
  fl.sig<-fl[fl$Fisher.s.P<cutoff,"Locus.ID"]
  length(tx.sig[(tx.sig %in% c(la.sig,al.sig,fl.sig))])
  length(la.sig[(la.sig %in% c(tx.sig,al.sig,fl.sig))])
  length(al.sig[(al.sig %in% c(la.sig,tx.sig,fl.sig))])
  all.shared<-fl.sig[fl.sig %in% la.sig & fl.sig %in% al.sig & fl.sig %in% tx.sig]
  fw.shared.chr<-fwsw.tx[fwsw.tx$Locus.ID %in% all.shared,c("Locus.ID","Chr","BP","Column","Overall.Pi")]
  tapply(fw.shared.chr$Locus.ID,factor(fw.shared.chr$Chr),function(x){ length(unique(x)) })
  #are they using the same SNPs or different SNPs?
  stacks.sig<-data.frame(nrow=nrow(fw.shared.chr),ncol=4)
  for(i in 1:nrow(fw.shared.chr)){
    tx.bp<-fwsw.tx[tx$Fisher.s.P<cutoff & tx$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    la.bp<-fwsw.la[la$Fisher.s.P<cutoff & la$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    al.bp<-fwsw.al[al$Fisher.s.P<cutoff & al$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    fl.bp<-fwsw.fl[fl$Fisher.s.P<cutoff & fl$Locus.ID == fw.shared.chr[i,"Locus.ID"],"BP"]
    stacks.sig[i,1]<-paste(tx.bp,sep=",",collapse = ",")
    stacks.sig[i,2]<-paste(la.bp,sep=",",collapse = ",")
    stacks.sig[i,3]<-paste(al.bp,sep=",",collapse = ",")
    stacks.sig[i,4]<-paste(fl.bp,sep=",",collapse = ",")
  }
  colnames(stacks.sig)<-c("TX","LA","AL","FL")
  stacks.sig<-data.frame(cbind(fw.shared.chr,stacks.sig))
  stacks.sig$SNP<-paste(stacks.sig$Chr,stacks.sig$BP,sep=".")
  return(stacks.sig)
}

#' extract the significant regions from the gff file
sig.region.ann<-function(fw.shared.chr,gff,genome.blast)
{
  fw.sig.reg<-do.call(rbind,apply(fw.shared.chr,1,function(sig){
    this.gff<-gff[as.character(gff$seqname) %in% as.character(unlist(sig["Chr"])),]
    this.reg<-this.gff[this.gff$start <= as.numeric(sig["BP"]) & this.gff$end >= as.numeric(sig["BP"]),]
    if(nrow(this.reg) == 0){
      if(as.numeric(sig["BP"])>max(as.numeric(this.gff$end))){
        new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                        region="beyond.last.contig", description=NA,SSCID=NA)
      }else{
        new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                        region=NA,description=NA,SSCID=NA)
      }
    }else{
      if(length(grep("SSCG\\d+",this.reg$attribute))>0){
        geneID<-unique(gsub(".*(SSCG\\d+).*","\\1",this.reg$attribute[grep("SSCG\\d+",this.reg$attribute)]))
        gene<-genome.blast[genome.blast$sscv4_gene_ID %in% geneID,"blastp_hit_description"]
      }else{
        geneID<-NA
        gene<-NA
      }
      new<-data.frame(Locus=sig["Locus.ID"],Chr=sig["Chr"],BP=sig["BP"],SNPCol=sig["Column"],
                      region=paste(this.reg$feature,sep=",",collapse = ","),description=gene,SSCID=geneID)
    }
    return(as.data.frame(new))
  }))
}

Combine putative genes and genomic info

#' extract the significant regions from the gff file
put.reg<-do.call(rbind,apply(put.genes,1,function(gene){
  ssc.gene<-as.character(gene[[6]])
  #there might be multiple matches
  ssc.genes<-unlist(strsplit(ssc.gene,","))
  #for each gene, match to gff
  gene.info<-do.call(rbind,lapply(ssc.genes,function(genename){
    this.gff<-gff[grep(genename,gff$attribute),]
    chrom<-unique(as.character(this.gff$seqname))
    start<-min(this.gff$start)
    stop<-max(this.gff$end)
    return(c(chrom,start,stop))
  }))
  if(as.character(gene[[3]]) == "") { gene[[3]]<-NA }
  new<-data.frame(Gene=rep(gene[[1]],length(gene.info[,1])),
                  Function=rep(gene[[2]],length(gene.info[,1])),
                  Chromsome=rep(gene[[3]],length(gene.info[,1])),
                  Species=rep(gene[[4]],length(gene.info[,1])),
                  Citation=rep(gene[[5]],length(gene.info[,1])),
                  Scovelli_geneID=rep(gene[[6]],length(gene.info[,1])),
                  Notes=rep(gene[[7]],length(gene.info[,1])),
                  Chrom=gene.info[,1],StartBP=gene.info[,2],StopBP=gene.info[,3],
                  stringsAsFactors = FALSE)
  return(as.data.frame(new))
}))
write.table(put.reg,"putative.gene.regions.tsv",col.names=T,sep='\t')

Note that some genes will have multiple rows as they match multiple locations in the genome.

Do the putative genes contain RAD loci?

put.reg$rad.loci<-apply(put.reg,1,function(gene){
  rad<-vcf[vcf$`#CHROM` %in% gene[["Chrom"]] & 
           vcf$POS >= as.numeric(as.character(gene[["StartBP"]])) & 
           vcf$POS <= as.numeric(as.character(gene[["StopBP"]])),]  
  if(nrow(rad)>0){ rad.snps<-paste(rad$SNP,sep=",",collapse=",") }
  else { rad.snps<-NA }
  return(rad.snps)
})

About 48.1818182% of the putative genes have RAD loci within them - but only about 22.037037% of all of the annotations have RAD loci.

Compare the Stacks outliers with putative genes

What is the maximum distance of loci in LD?

\(D'\) was calculated using a custom C++ program (found at https://github.com/spflanagan/SCA/tree/master/programs/pairwise_ld_vcf) using the formula:

\[D' = \sum_{i=1}^m\sum_{j=1}^n \frac{p_iq_j|D_{ij}|}{D_{max}},\] where \(m\) is the number of alleles at locus and \(n\) is the number of alleles at locus B. \(D_{ij}\) was calculated as \(f_{ij}-p_iq_j\), where \(f_{ij}\) is the frequency of the \(A_iB_j\) haplotype, \(p_i\) is the frequency of allele \(i\), and \(q_j\) is the frequency of allele \(j\). \(D_{max}\) is the lesser of \(p_iq_j\) or \((1-p_i)(1-q_j)\) when \(D_{ij}\) < 0 and is the minimum of (1 - \(p_{i}\))\(q_{i}\) or \(p_i(1-q_i)\) when \(D_{ij}\) > 0.

In the data.frame ld, the locus IDs (LocIDA and LocIDB) are the positions on the chromosome.

ld$pos.diff<-abs(ld$LocIDA - ld$LocIDB)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)

Functions for comparison

#' Find outliers in a putative gene region
outlier.in.region<-function(outlier.df, region.df, oChrom="Chrom", oBP="BP",
                            rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
  out.in<-apply(region.df,1,function(put.gene){
    oin<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]] 
      & outlier.df[[oBP]] >= as.numeric(as.character(put.gene[[rBPStart]]))
      & outlier.df[[oBP]] <= as.numeric(as.character(put.gene[[rBPStop]])),"SNP"]
    if(length(oin)==0){ oin<-NA }
    return(paste(oin,sep=",",collapse=","))
  })
  return(out.in)
}
outlier.nearby<-function(outlier.df,region.df,ld.df,
                         oChrom="Chrom", oBP="BP",
                         rChrom="Chrom",rBPStart="StartBP",rBPStop="StopBP"){
  out.nearby<-apply(region.df,1,function(put.gene){
    nearby<-outlier.df[outlier.df[[oChrom]] %in% put.gene[[rChrom]] 
      & outlier.df[[oBP]] >= (as.numeric(as.character(put.gene[[rBPStart]])) - (ld.df[put.gene[[rChrom]] ]))
      & outlier.df[[oBP]] <= (as.numeric(as.character(put.gene[[rBPStop]])) + (ld.df[put.gene[[rChrom]] ])),"SNP"]
    if(length(nearby)==0){ nearby<-NA }
    return(paste(nearby,sep=",",collapse=","))
  })
}

Prep regions for plotting

#' Get the plotting positions for the putative genes
all.chr<-data.frame(Chr=vcf$`#CHROM`,BP=vcf$POS,stringsAsFactors = F)
bounds<-data.frame(levels(as.factor(all.chr$Chr)),tapply(as.numeric(as.character(all.chr$BP)),all.chr$Chr,max))
colnames(bounds)<-c("Chrom","End")
plot.scaffs<-scaffs[scaffs %in% bounds$Chrom]

#convert put.reg to the plotting BP
new.dat<-data.frame(stringsAsFactors = F)
last.max<-0
for(i in 1:length(plot.scaffs)){
  #pull out the data for this scaffold
  if(nrow(bounds[bounds$Chrom %in% plot.scaffs[i],])>0){ #sanity check
    chrom.dat<-put.reg[put.reg$Chrom %in% plot.scaffs[i],]
    if(nrow(chrom.dat)>0){
      chrom.dat$plot.min<-as.numeric(as.character(chrom.dat$StartBP))+last.max
      chrom.dat$plot.max<-as.numeric(as.character(chrom.dat$StopBP))+last.max
      new.dat<-rbind(new.dat,chrom.dat)
      #last.max<-max(chrom.dat$plot.pos)+
      #               as.numeric(scaffold.widths[scaffold.widths[,1] %in% scaffs.to.plot[i],2])
    }
    last.max<-last.max+
      as.numeric(bounds[bounds$Chrom %in% plot.scaffs[i],2])
  }else{
    print(paste(plot.scaffs[i], "has no designated width."))
  }
}
#make sure everything is the correct class
new.dat$plot.min<-as.numeric(as.character(new.dat$plot.min))
new.dat$plot.max<-as.numeric(as.character(new.dat$plot.max))

Plot the Fsts

FST outliers in putative gene regions

fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
all.put.ssc<-as.character(put.reg$Scovelli_geneID)
all.put.ssc<-unlist(lapply(all.put.ssc,strsplit,","))
sig.put.match<-all.put.ssc[all.put.ssc %in% fw.sig.reg$SSCID]

#or use the functions
fw.sig.reg$SNP<-paste(fw.sig.reg$Chr,as.character(fw.sig.reg$BP),sep=".")
stacks.in<-outlier.in.region(fw.sig.reg,put.reg,oChrom="Chr")
put.reg$fst.in<-as.character(stacks.in)

Of the 54 shared outlier SNPs, 27 are in coding regions of the genome. However, 2 shared outliers are in putative gene regions, namely APOL, SPG1

No, they don’t overlap. Maybe they’re in the same LD region?

put.nearby.rad<-outlier.nearby(fw.sig.reg,put.reg,chrom.ld,oChrom = "Chr")

#add the nearby significant rad loci to the put.reg data.frame
put.reg$nearby.rad<-put.nearby.rad
head(put.reg[put.reg$nearby.rad!="NA",])
##     Gene
## 1  ABCB7
## 2 ABLIM3
## 3 ADAM19
## 4 ADAM19
## 6  ADRB2
## 7  ADRB2
##                                                                         Function
## 1                                                                iron metabolism
## 2                                                                  actin-binding
## 3                                                     osteoblast differentiation
## 4                                                     osteoblast differentiation
## 6                                                bone density and mineralization
## 7 bone density and mineralization, tooth organogenesis, craniofacial development
##   Chromsome                Species             Citation
## 1      <NA> Gasterosteus aculeatus    Jones et al 2012b
## 2      <NA> Gasterosteus aculeatus Hohenlohe et al 2010
## 3        IV Gasterosteus aculeatus Hohenlohe et al 2010
## 4        IV Gasterosteus aculeatus Hohenlohe et al 2010
## 6       VII Gasterosteus aculeatus Hohenlohe et al 2010
## 7        IV Gasterosteus aculeatus Hohenlohe et al 2010
##                   Scovelli_geneID Notes Chrom  StartBP   StopBP rad.loci
## 1                 SSCG00000000949  <NA>  LG14  5689975  5711169     <NA>
## 2                 SSCG00000004181  <NA>  LG14 12122996 12144915     <NA>
## 3 SSCG00000004162,SSCG00000007962  <NA>  LG14 11860301 11877333     <NA>
## 4 SSCG00000004162,SSCG00000007962  <NA>  LG14 22306738 22325075     <NA>
## 6                 SSCG00000004169  <NA>  LG14 12061583 12065881     <NA>
## 7                 SSCG00000004169  <NA>  LG14 12061583 12065881     <NA>
##   fst.in                                          nearby.rad
## 1     NA LG14.3289190,LG14.3352883,LG14.3960463,LG14.5029044
## 2     NA            LG14.5029044,LG14.13689654,LG14.18295431
## 3     NA            LG14.5029044,LG14.13689654,LG14.18295431
## 4     NA                                       LG14.18295431
## 6     NA            LG14.5029044,LG14.13689654,LG14.18295431
## 7     NA            LG14.5029044,LG14.13689654,LG14.18295431

So of the 540 gene annotations 229 are within the mean LD range of an outlier, which represent 67 putative freshwater genes. Those genes are: ABCB7, ABLIM3, ADAM19, ADRB2, AFAP1L1, angiotensin II receptor, ANXA6, APOL, AQP3, ARHGEF3, ATP6V0A1, ATP6V1A, AVPR2, BGN, CA, CAII, CAMKK1, CD63, CEBPD, CFTR, cortisol, ECaC, EDA, EFNB1, FBLN1, FERH1, FZD2, HRH2, IGF-I, IGFBP5, IGFBP6, IL12B, KCNH4, KITLG, LEPR, LTB4R, MAPK11, MAPK12, MSX2, mucin, NAKATPase, NBC1, NCX, NHE, NKCC, NR4A1, PDLIM7, PMCA, PODXL, PRKCD, PRL, RDH10, RHOA, RHOGTP8, rtCR2, SCUBE1, SLC26A3, SLC2A13, SPG1, SYNPO, TBX2, TRIM14, UT, VATPase, WNT5A, WNT7B, ZEB1

Compare \(\delta\) -divergence to putative regions

Now for \(\delta\) -divergence

#in the gene
sdd.in<-outlier.in.region(dd.bp.smooth[[1]],put.reg,oBP = "Pos")
put.reg$sdd.in<-sdd.in

The genes ARHGEF3, OSBPL8, RHOGTP8 contain \(\delta\)-divergence outlier regions.

#in the gene
sdd.nb<-outlier.nearby(dd.bp.smooth[[1]],put.reg,chrom.ld,oBP = "Pos")
put.reg$sdd.nb<-sdd.nb

And of the 154 \(\delta\)-divergence outliers, 91.4814815% are in the LD region around putative freshwater genes.

Compare Bayenv outliers to putative regions

#taken directly from fwsw_analysis.R
bf<-read.delim("bayenv/p4.bf.txt",header=T)
bf$SNP<-paste(bf$scaffold,as.numeric(as.character(bf$BP))+1,sep=".")
bf.co<-apply(bf[,5:7],2,quantile,0.99) #focus on Bayes Factors, because of Lotterhos & Whitlock (2015)
temp.bf.sig<-bf[bf$Temp_BF>bf.co["Temp_BF"],c(1,2,4,8,5,9)]
sal.bf.sig<-bf[bf$Salinity_BF>bf.co["Salinity_BF"],c(1,2,4,8,6,9)]
grass.bf.sig<-bf[bf$seagrass_BF>bf.co["seagrass_BF"],c(1,2,4,8,7,9)]
#get the log transformed Bayes Factors
bf$logSal<-log(bf$Salinity_BF)
bf$logTemp<-log(bf$Temp_BF)
bf$logSeagrass<-log(bf$seagrass_BF)

There are 0 overlapping outliers between temperature-, salinity-, and seagrass-associated loci.

But if we only care about salinity ones, there are 91 outliers.

Are any of the Bayenv salinity outliers near the putative freshwater genes?

bfs.in<-outlier.in.region(sal.bf.sig,put.reg,"scaffold")
bfs.nb<-outlier.nearby(sal.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.3703704% are in putative freshwater genes and 75.5555556% are within the LD neighborhood.

Are any of the Bayenv temperature outliers near putative freshwater genes?

bft.in<-outlier.in.region(temp.bf.sig,put.reg,"scaffold")
bft.nb<-outlier.nearby(temp.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.5555556% are in putative freshwater genes and 80.1851852% are within the LD neighborhood.

Are any of the loci associated with seagrass density in or near putative freshwater genes?

bfg.in<-outlier.in.region(grass.bf.sig,put.reg,"scaffold")
bfg.nb<-outlier.nearby(grass.bf.sig,put.reg,chrom.ld,"scaffold")

Of the 9053 RAD loci analyzed by Bayenv2 for associations with temperature, 0.1851852% are in putative freshwater genes and 77.962963% are within the LD neighborhood.

Which putative genes contain each of those?

put.reg$bfs.in<-bfs.in
put.reg$bft.in<-bft.in
put.reg$bfg.in<-bfg.in

unique(put.reg[put.reg$bfs.in !="NA","Gene"])
## [1] ARHGEF3 NHE    
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bft.in !="NA","Gene"])
## [1] APOL   NHE    TRIM14
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1
unique(put.reg[put.reg$bfg.in !="NA","Gene"])
## [1] ARHGEF3
## 110 Levels: ABCB7 ABLIM3 ADAM19 ADAMTS10 ADRB2 AE ... ZEB1

All three BayEnv tests identified outliers in the genes . The temperature and grass also share NA, and temperature and salinity share NHE.

Jost’s D

Jost’s D measures the fraction of allelic differentiation between populations. According to Molecular Ecologist, “if allelic differentiation at a particular locus is the value of interest, it appears that D is the best measure”, so it might be useful to look at Jost’s D in the putatively freshwater genes.

jost.in<-outlier.in.region(jd.bp.smooth[[1]],put.reg,oChrom="Chrom",oBP="Pos")
jost.nb<-outlier.nearby(jd.bp.smooth[[1]],put.reg,chrom.ld,oChrom = "Chrom",oBP="pos")
put.reg$jostd.nb<-jost.nb
put.reg$jostd.in<-jost.in

So \(1\) loci are located within putative freshwater genes, but \(0\) are within the LD region of the putative genes, representing \(0\) of the putative gene annotations and \(0\) genes.

Combining all of those analyses

 kable(unique(put.reg[put.reg$fst.in != "NA",c("Gene","Chrom")]),caption = "Putative genes containing Fst outliers",row.names = FALSE)
Putative genes containing Fst outliers
Gene Chrom
APOL LG14
SPG1 LG1
 kable(unique(put.reg[put.reg$sdd.in != "NA",c("Gene","Chrom")]),
       caption="Putative genes containing delta divergence outliers",row.names = FALSE)
Putative genes containing delta divergence outliers
Gene Chrom
ARHGEF3 LG13
OSBPL8 LG17
RHOGTP8 LG15
RHOGTP8 LG20
 kable(unique(put.reg[put.reg$jostd.in !="NA",c("Gene","Chrom")]),
       caption="Putative genes containing Jost's D outliers",row.names = FALSE)
Putative genes containing Jost’s D outliers
Gene Chrom
NHE LG11
 kable(unique(put.reg[put.reg$bfs.in != "NA",c("Gene","Chrom")]),
       caption="Putative genes containing salinity-associated SNPs",row.names = FALSE)
Putative genes containing salinity-associated SNPs
Gene Chrom
ARHGEF3 LG14
NHE LG11
 kable(unique(put.reg[put.reg$fst.in != "NA",c("Gene","Chrom")])[
                            unique(put.reg[put.reg$fst.in != "NA","Gene"]) %in% unique(put.reg[put.reg$sdd.in != "NA","Gene"])&
                              unique(put.reg[put.reg$fst.in != "NA","Gene"]) %in% unique(put.reg[put.reg$bfs.in != "NA","Gene"])],
       caption="Putative genes containing all of those outliers",row.names=FALSE)

Table: Putative genes containing all of those outliers

 kable(unique(put.reg[put.reg$fst.in != "NA",c("Gene","Chrom")])[
                            unique(put.reg[put.reg$fst.in != "NA","Gene"]) %in% unique(put.reg[put.reg$sdd.in != "NA","Gene"])&
                              unique(put.reg[put.reg$fst.in != "NA","Gene"]) %in% unique(put.reg[put.reg$jostd.in != "NA","Gene"])],
       caption="Putative genes containing divergence outliers (Fst, deltad, D)",row.names = FALSE)

Table: Putative genes containing divergence outliers (Fst, deltad, D)

 kable(unique(put.reg[put.reg$nearby.rad != "NA","Gene"])[
                            unique(put.reg[put.reg$nearby.rad != "NA","Gene"]) %in% unique(put.reg[put.reg$sdd.nb != "NA","Gene"])&
                              unique(put.reg[put.reg$nearby.rad != "NA","Gene"]) %in% unique(put.reg[put.reg$jostd.in!= "NA","Gene"])],
       caption="Putative genes near divergence outliers (Fst, deltad, D)",row.names=FALSE)
Putative genes near divergence outliers (Fst, deltad, D)
x
NHE

But looking at put.reg[put.reg$Gene =="RHOGTP8","rad.loci"], it’s obvious that Rho GTPase-activating proteins are distributed throughout the genome (on chromosomes LG11, LG15, LG20, LG14, LG1, scaffold_1578, LG12, LG9, LG8, LG22, LG18, LG5, scaffold_1008, LG6, scaffold_950, LG10, LG7, scaffold_139, LG21, LG16, LG19).

Figure 6

Find genomic regions with high \(\pi\) and low \(\delta\)-divergence

#Do high pi and het have low deltad?
#do it per chrom
balancing.sel<-function(pi.out,pi,ht.out,ht,
                        dd.out,dd,jd.out,jd){
  pi.upp<-pi.out[pi.out$direction=="upper",]
  ht.upp<-ht.out[ht.out$direction=="upper",]
  dd.low<-dd.out[dd.out$direction=="lower",]
  jd.low<-jd.out[jd.out$direction=="lower",]
  shared.div<-pi.upp[pi.upp[,pi] %in% ht.upp[,ht],] 
  shared.dif<-dd.low[dd.low[,dd] %in% jd.low[,jd],] 
  bal<-shared.div[shared.div[,pi] %in% shared.dif[,dd]]
  return(list(div=shared.div,dif=shared.dif,bal=bal))
}

bal<-balancing.sel(pi.pp.smooth[[1]],"plot.pos",ht.pp.smooth[[1]],"plot.pos",
                   dd.pp.smooth[[1]],"plot.pos",jd.pp.smooth[[1]],"plot.pos")


shared.upp<-bal$div

There are loci with high \(\pi\) and low \(\delta\)-divergence on 13 chromosomes.

Instead of the lumped marine-freshwater \(F_{ST}\) values that I used originally, I’m going to plot the average of pairwise \(F_{ST}\) values.

fst.means<-fwsw.al
for(i in 1:nrow(fst.means)){
  if(fst.means$Locus.ID[i] %in% fwsw.fl$Locus.ID &
     fst.means$Locus.ID[i] %in% fwsw.la$Locus.ID &
     fst.means$Locus.ID[i] %in% fwsw.tx$Locus.ID){
      mu<-mean(fwsw.fl$Corrected.AMOVA.Fst[fwsw.fl$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.la$Corrected.AMOVA.Fst[fwsw.la$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.al$Corrected.AMOVA.Fst[fwsw.al$Locus.ID==fst.means$Locus.ID[i]],
               fwsw.tx$Corrected.AMOVA.Fst[fwsw.tx$Locus.ID==fst.means$Locus.ID[i]])
      fst.means$Corrected.AMOVA.Fst[i]<-mu
  }
  else{
    fst.means$Corrected.AMOVA.Fst[i]<-NA
  }
}
fst.means<-fst.means[!is.na(fst.means$Corrected.AMOVA.Fst),]
fwsw<-fst.means
fst.points<-FALSE
#and putative genes
put.genes<-read.delim("putative_genes.txt",header=TRUE,sep='\t')
#genome annotations
#put.reg<-read.delim("putative.gene.regions.tsv",header=T)
chrom.ld<-tapply(ld[ld$D.>0.5,"pos.diff"],ld[ld$D. >0.5,"Chrom"],mean)
#select genes of interest
#fav.genes<-c("AQP3","TNS1","CAMKK1","mucin","CAII","NAKATPase","ARHGEF3")
#fav.genes<-c("TNS1","CAII","TRIM14","VATPase")
fav.genes<-c("ARHGEF3", "mucin", "NHE", "TAAR")
genes2plot<-put.reg[put.reg$Gene %in% fav.genes,]
genes2plot$Gene<-as.character(genes2plot$Gene)
genes2plot$Gene[genes2plot$Gene=="ARHGEF3"]<-"ARHGEF" #rename
#shift LG3 names
genes2plot$StartBP[genes2plot$Chrom=="LG3" & genes2plot$Gene=="ARHGEF"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG3" & genes2plot$Gene=="ARHGEF"]+200000
genes2plot$StartBP[genes2plot$Chrom=="LG3" & genes2plot$Gene=="mucin"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG3" & genes2plot$Gene=="mucin"]-100000
#shift LG7 names
genes2plot$StartBP[genes2plot$Chrom=="LG7" & genes2plot$Gene=="ARHGEF"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG7" & genes2plot$Gene=="ARHGEF"]+200000
genes2plot$StartBP[genes2plot$Chrom=="LG7" & genes2plot$Gene=="NHE"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG7" & genes2plot$Gene=="NHE"]-200000
#shift LG14 names
genes2plot$StartBP[genes2plot$Chrom=="LG14" & genes2plot$Gene=="ARHGEF"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG14" & genes2plot$Gene=="ARHGEF"]+550000
genes2plot$StartBP[genes2plot$Chrom=="LG14" & genes2plot$Gene=="NHE"]<-
  genes2plot$StartBP[genes2plot$Chrom=="LG14" & genes2plot$Gene=="NHE"]-600000
#get rid of the final ARHGEF
genes2plot<-genes2plot[-which(genes2plot$Chrom=="LG14" & genes2plot$Gene=="ARHGEF" & genes2plot$StartBP>23022883),]

#shared Fst outliers
fw.sig.reg<-read.csv("p4.StacksFWSWOutliers_annotatedByGenome.csv")
h.pi.name<-"HandPi_subgenes.png"
row.settings<-c(4,4)
chroms2plot<-unique(shared.upp$Chr)

#generate xlims
xlims<-lapply(chroms2plot,function(lg,ld,genes,vcf){
  xs<-c(min(genes[genes$Chr == lg,"plot.pos"]-ld[lg]),
        max(genes[genes$Chr == lg,"plot.pos"]+ld[lg]))
  if(xs[1] < 0){
    xs[1]<-0
  }
  if(xs[2] > max(vcf$POS[vcf$`#CHROM`==lg])){
    xs[2]<-max(vcf$POS[vcf$`#CHROM`==lg])
  }
  return(xs)
},ld=chrom.ld,genes=shared.upp,vcf=vcf)
#or do full chroms
#generate xlims
xlims<-lapply(chroms2plot,function(lg,genes,vcf){
  xs<-c(min(vcf$POS[vcf$`#CHROM`==lg]),
        max(vcf$POS[vcf$`#CHROM`==lg]))
  return(xs)
},genes=shared.upp,vcf=vcf)
names(xlims)<-chroms2plot
HandPiName<-expression(Large~pi~and~italic(H))
includeArrows<-FALSE
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
  near.pos<-t(apply(stat.df,1,function(df,target){
    x<-as.numeric(df[s.pos])
    pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
    if(pos==x){
      stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
    }else{
      if(pos>x){
        upp<-which.min(abs(x-target[,t.pos]))
        low<-upp-1
      }else{
        low<-which.min(abs(x-target[,t.pos]))
        upp<-low+1
      }
      upp.pt<-target[upp,c(t.pos,t.stat)]
      low.pt<-target[low,c(t.pos,t.stat)]
      slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
      b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
      stat<-slope*x+b
    }
    return(cbind(x,stat))
  },target=target))
  return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
  #smooth== smooth[[1]]
  #target== smooth[[2]]
  upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
  low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
  upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
  points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
  this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
  this.xlim<-xlims[[as.character(chroms2plot[i])]]
  plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
  xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  
  #the shared peaks
  p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
         function(pos){
           points(y=c(-0.2,0.5),x=c(pos,pos),
                  type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
         })
  
  points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
         x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
             shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
         type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
  #putative gene regions
  g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] & 
                  genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
  a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
  
  if(nrow(g) > 0){
    rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
       ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
  }
  
  #Fst
  if(fst.points==TRUE){
    points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
           col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
  }
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
                color=comp.col["pi"],stat="Pi",pos.name="Pos")
  }
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["Het"],stat="Het",pos.name="Pos")
  }
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["deltad"],stat="deltad",pos.name="Pos")
  }
  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["D"],stat="D",pos.name="Pos")
  }
  #shared Fst outliers
  points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
         rep(0.5,length(this.df$BP[this.df$BP %in% fw.sig.reg$BP])),
         pch="|",cex=2,col="orchid4",lwd=3)
  
  if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
  }
  #axes etc
   axis(1,pos=-0.2,c(xmin,xmax),
       labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
  axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
  mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c("Putative Gene",
                expression(Shared~italic(F)[ST]~Outlier),
                HandPiName),
       bty='n',pch=c(15,124,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c(expression(italic(H)),
                expression(pi),#expression(FW-SW~italic(F)[ST]),
                expression("Jost's"~italic(D)),
                expression(delta~-divergence)),
       bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
             comp.col[4:5]))
dev.off()
## png 
##   2
Figure 6. Low \delta-divergence and high \pi

Figure 6. Low \(\delta\)-divergence and high \(\pi\)

Focus on LG8 to demonstrate the pattern

To demonstrate these patterns, let’s focus on the clearest signal, on LG8. I’ll do three plots, one for pi and H, one for \(\delta\)-divergence and Jost’s D, and one with the four FWSW Fst comparisons.

## SETUP
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
this.chrom<-"LG8"
i<-which(chroms2plot==this.chrom) #LG8 is the second in the 'chroms2plot' list
this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
this.xlim<-xlims[[as.character(chroms2plot[i])]]
xmin<-this.xlim[1]
xmax<-this.xlim[2]

# Find the shared peak
pos<-shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]]
regmin<-pos-mean(ld$pos.diff[ld$Chrom==this.chrom & ld$LocIDA==pos & ld$D. > 0.5])
regmax<-pos+mean(ld$pos.diff[ld$Chrom==this.chrom & ld$LocIDA==pos & ld$D. > 0.5])


# Find the putative gene regions
g<-put.reg[put.reg$Chrom == this.chrom,c("Gene","Chrom","StartBP","StopBP")]
#remove duplicates
if(this.chrom=="LG8"){
  g<-g[g$Gene !="ATP6V1A",] 
  g<-g[-which(g$Gene=="VATPase" & g$StartBP<3000000),]
}
g<-g[!duplicated(g),]

txt.locs<-data.frame(starts=unique(g$StartBP),name=as.character(g$Gene[!duplicated(g$StartBP)]),
                     stringsAsFactors = FALSE)
txt.locs<-txt.locs[order(txt.locs$starts),]
rownames(txt.locs)<-NULL
#move the labels
if(this.chrom=="LG8"){
  txt.locs$starts[12]<-txt.locs$starts[12]+100000 #VATPase
  txt.locs$starts[10]<-txt.locs$starts[10]-100000 #MKP8
  txt.locs$starts[11]<-txt.locs$starts[11]-100000 #mucin
  txt.locs$starts[txt.locs$name=="RHOGTP8"]<-txt.locs$starts[txt.locs$name=="RHOGTP8"]+50000 #RhoGTP
  txt.locs$starts[5]<-txt.locs$starts[5]+50000 #PRL2
  
  txt.locs$starts[1]<-txt.locs$starts[1]+90000 #TRIM14
  txt.locs$starts[2]<-txt.locs$starts[2]+100000 #VATPase
  txt.locs$starts[3]<-txt.locs$starts[3]+100000 #PRL1
  #rename
  txt.locs$name[txt.locs$name=="ATP6V0A1"]<-"VATPase"
  txt.locs$name[txt.locs$name=="RHOGTP8"]<-"RhoGTP"
}
## PLOT
png(paste(this.chrom,"_balancingsel.png",sep=""),height=6,width=10,units="in",res=300)
par(mfrow=c(3,1),oma=c(2,2,2,1),mar=c(1,2,1.5,2),xpd=TRUE)
# Empty plot for H and pi
plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(0,0.5),axes=F,ylab="",xlab="",type='n')
# Add the shared peak
points(y=c(0,0.5),x=c(pos,pos),type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
rect(regmin,0,regmax,0.5,col=alpha("#543005",0.15),border=NA) #LD region
# Add the putative gene regions
rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
     ybottom=0,ytop=0.5,col="indianred",border="indianred")
# Add Pi
points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
     type="l",lwd=2,col=comp.col["pi"])
#upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
#            color=comp.col["pi"],stat="Pi",pos.name="Pos")
# Add Het
points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",col=comp.col["Het"],lwd=2)
#upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
#            color=comp.col["Het"],stat="Het",pos.name="Pos")  
# Add putative gene name
#text(x=txt.locs$starts[4:12],y=c(0.25,rep(0.35,3),rep(0.25,5)),
#     cex=2,labels=txt.locs$name[4:12],srt=90,xpd=T,font=2)
#text(x=txt.locs$starts[1:3],y=c(0.43,0.3,0.05),cex=2,labels=txt.locs$name[1:3],xpd=T,font=2,pos=2)
# axes etc
axis(1,pos=0,c(xmin,xmax),labels = FALSE)
axis(2,las=1,cex.axis=2,pos=xmin,at=c(0,0.25,0.5))
legend(0,0.65,legend=c(expression(italic(H)),expression(pi)),
       bty='n',lwd=c(2,2),lty=c(1,1),xpd=TRUE,ncol=2,
       cex = 2,col=c(comp.col["Het"],comp.col["pi"]),x.intersp = 0.2)

# Empty plot for D and delta-divergence
plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.1,0.2),axes=F,ylab="",xlab="",type='n')
# Add the shared peak
points(y=c(-0.1,0.2),x=c(pos,pos),type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
rect(regmin,-0.1,regmax,0.2,col=alpha("#543005",0.15),border=NA) #LD region
# Add the putative gene regions
rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
     ybottom=-0.1,ytop=0.2,col="indianred",border="indianred")
# Add deltad
points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",col=comp.col["deltad"],lwd=2)
#upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
#            color=comp.col["deltad"],stat="deltad",pos.name="Pos")
# Add Josts D
points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",col=comp.col["D"],lwd=2)
#upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
#            color=comp.col["D"],stat="D",pos.name="Pos")
# axes etc
axis(1,pos=-0.1,c(xmin,xmax),labels = FALSE)
axis(2,las=1,cex.axis=2,pos=xmin,at=c(-0.1,0.05,0.2))
legend(0,0.28,legend=c(expression("Jost's"~italic(D)),expression(delta~-divergence)),
       bty='n',lwd=c(2,2),lty=c(1,1),xpd=TRUE,ncol=2,x.intersp = 0.2,text.width = 1000000,
       cex = 2,col=c(comp.col["D"],comp.col["deltad"]))

# Empty plot for Fsts
plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(0,0.5),axes=F,ylab="",xlab="",type='n')
# Add the shared peak
points(y=c(0,0.5),x=c(pos,pos),type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
rect(regmin,0,regmax,0.5,col=alpha("#543005",0.15),border=NA) #LD region
# Add the putative gene regions
rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
     ybottom=0,ytop=0.5,col="indianred",border="indianred")
# Add Fsts
points(fwsw.fl$BP[fwsw.fl$Chr==this.chrom],fwsw.fl$Smoothed.AMOVA.Fst[fwsw.fl$Chr==this.chrom],
       type="l",col=grp.colors[6],lwd=2)
points(fwsw.tx$BP[fwsw.tx$Chr==this.chrom],fwsw.tx$Smoothed.AMOVA.Fst[fwsw.tx$Chr==this.chrom],
       type="l",col=grp.colors[1],lwd=2)
points(fwsw.la$BP[fwsw.la$Chr==this.chrom],fwsw.la$Smoothed.AMOVA.Fst[fwsw.la$Chr==this.chrom],
       type="l",col=grp.colors[2],lwd=2)
points(fwsw.al$BP[fwsw.al$Chr==this.chrom],fwsw.al$Smoothed.AMOVA.Fst[fwsw.al$Chr==this.chrom],
       type="l",col=grp.colors[2],lwd=2,lty=2)
# Add putative gene name
if(this.chrom=="LG8"){
  text(x=txt.locs$starts[4:12],y=c(0.25,rep(0.35,7),0.25),
      cex=2,labels=txt.locs$name[4:12],srt=90,xpd=T,font=2)

  text(x=txt.locs$starts[1:3],y=c(0.43,0.3,0.15),cex=2,labels=txt.locs$name[1:3],xpd=T,font=2,pos=2)
}else{
  text(x=txt.locs$starts,y=rep(0.25,length(txt.locs$starts)),
      cex=2,labels=txt.locs$name,srt=90,xpd=T,font=2)
}
# axes etc
axis(1,pos=0,c(xmin,xmax),
   labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
axis(2,las=1,cex.axis=2,pos=xmin,at=c(0,0.25,0.5))
legend(0,0.65,legend=c(expression("TX FWSW"~italic(F)[ST]),
                          expression("LA FWSW"~italic(F)[ST]),
                          expression("AL FWSW"~italic(F)[ST]),
                          expression("FL FWSW"~italic(F)[ST])),
       bty='n',lwd=2,lty=c(1,1,2,1),ncol=4,xpd=TRUE,
       cex = 2,col=c(grp.colors[1],grp.colors[2],grp.colors[2],grp.colors[6]),
       x.intersp = 0.2)

# Add x-axis label
mtext(paste("Position on",this.chrom,"(Mb)"),1,cex=2*0.75,line=1)

dev.off()
## png 
##   2
## png 
##   2
Figure 6. An example of a region with putative balancing selection. The SNP designated by the dark brown bar was in the top 1% of two measures of diversity and in the lower 1% of two measures of divergence, suggesting that this SNP may be in a region experiencing balancing selection. The two measures of diversity are plotted in the top panel: nucleotide diversity, π, in dark teal and observed heterozygosity, H, in light teal. The two measures of divergence are in the middle panel: Jost’s D in dark brown and δ-divergence in light brown. The bottom panel shows the four pairwise freshwater-saltwater FST values for the sites in Texas (solid dark purple), Louisiana (solid light purple), Alabama (dashed light purple), and Florida (solid green). The red vertical lines indicate candidate freshwater adaptation genes and are annotated in the bottom panel. The grey rectangle indicates the dc region around the SNP (see text for details).

Figure 6. An example of a region with putative balancing selection. The SNP designated by the dark brown bar was in the top 1% of two measures of diversity and in the lower 1% of two measures of divergence, suggesting that this SNP may be in a region experiencing balancing selection. The two measures of diversity are plotted in the top panel: nucleotide diversity, π, in dark teal and observed heterozygosity, H, in light teal. The two measures of divergence are in the middle panel: Jost’s D in dark brown and δ-divergence in light brown. The bottom panel shows the four pairwise freshwater-saltwater FST values for the sites in Texas (solid dark purple), Louisiana (solid light purple), Alabama (dashed light purple), and Florida (solid green). The red vertical lines indicate candidate freshwater adaptation genes and are annotated in the bottom panel. The grey rectangle indicates the dc region around the SNP (see text for details).

Look for regions of reduced diversity as evidence of selective sweeps.

upp.pi<-NULL
low.pi<-NULL
upp.het<-NULL
low.het<-NULL
upp.dd<-NULL
low.dd<-NULL
for(i in 1:length(lgs)){
  upp.pi<-rbind(upp.pi,avg.pi.adj[avg.pi.adj$Avg.Stat >= 
                     quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.95) &
                     avg.pi.adj$Chr %in% lgs[i],])
  low.pi<-rbind(low.pi,avg.pi.adj[avg.pi.adj$Avg.Stat <= 
                     quantile(avg.pi.adj$Avg.Stat[avg.pi.adj$Chr %in% lgs[i]],0.05) &
                     avg.pi.adj$Chr %in% lgs[i],])

  upp.het<-rbind(upp.het,avg.het.adj[avg.het.adj$Avg.Stat >= 
                       quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.95) &
                       avg.het.adj$Chr %in% lgs[i],])
  low.het<-rbind(low.het,avg.het.adj[avg.het.adj$Avg.Stat <= 
                       quantile(avg.het.adj$Avg.Stat[avg.het.adj$Chr %in% lgs[i]],0.05) &
                       avg.het.adj$Chr %in% lgs[i],])

  
}
shared.low<-low.pi[low.pi$plot.pos %in% low.het$plot.pos,]
shared.low$plot.min<-shared.low$Avg.Pos-250000
shared.low$plot.max<-shared.low$Avg.Pos+250000

h.pi.name<-"lowdiv_subgenes.png"
row.settings<-c(4,3)
chroms2plot<-unique(shared.low$Chr)
shared.upp<-shared.low
fst.points<-FALSE
HandPiName<-expression(Low~pi~and~italic(H))
#colors
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
#find the plotting location
nearest.pos<-function(stat.df,s.pos,s.stat,target,t.pos,t.stat){
  near.pos<-t(apply(stat.df,1,function(df,target){
    x<-as.numeric(df[s.pos])
    pos<-target[which.min(abs(x-as.numeric(target[,t.pos]))),t.pos]
    if(pos==x){
      stat<-as.numeric(target[which.min(abs(x-as.numeric(target[,t.pos]))),t.stat])
    }else{
      if(pos>x){
        upp<-which.min(abs(x-target[,t.pos]))
        low<-upp-1
      }else{
        low<-which.min(abs(x-target[,t.pos]))
        upp<-low+1
      }
      upp.pt<-target[upp,c(t.pos,t.stat)]
      low.pt<-target[low,c(t.pos,t.stat)]
      slope<-as.numeric((upp.pt[2]-low.pt[2])/(upp.pt[1]-low.pt[1]))
      b<-as.numeric(upp.pt[2]-(slope*upp.pt[1]))
      stat<-slope*x+b
    }
    return(cbind(x,stat))
  },target=target))
  return(near.pos)
}
upp.low.pts<-function(smooth,target,chrom,color,stat,pos.name,...){
  #smooth== smooth[[1]]
  #target== smooth[[2]]
  upp<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="upper",]
  low<-smooth[smooth[,"Chrom"]%in% chrom & smooth$direction=="lower",]
  upp.pts<-nearest.pos(stat.df=upp,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  low.pts<-nearest.pos(stat.df=low,s.pos=pos.name,s.stat=stat,
                          target=target[target[,"chr"]%in% chrom,],
                          t.pos="pos",t.stat="smoothed.stats")
  points(x=upp.pts[,1],y=upp.pts[,2],pch=24,bg=color,col=color,...)
  points(x=low.pts[,1],y=low.pts[,2],pch=25,bg=color,col=color,...)
}
png(h.pi.name,height=8,width=14,units="in",res=300)
par(mfrow=row.settings,oma=c(1,1,1,1),mar=c(1,2,2,1))
for(i in 1:length(chroms2plot)){
  this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
  this.xlim<-xlims[[as.character(chroms2plot[i])]]
  plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
  xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  
  #the shared peaks
  p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
         function(pos){
           points(y=c(-0.2,0.5),x=c(pos,pos),
                  type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
         })
  
  points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
         x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
             shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
         type="l",col=alpha("#543005",0.75),cex=2,lwd=4)
  #putative gene regions
  g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] & 
                  genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
  a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
  
  if(nrow(g) > 0){
    rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
       ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
  }
  
  #Fst
  if(fst.points==TRUE){
    points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
           col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
  }
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
                color=comp.col["pi"],stat="Pi",pos.name="Pos")
  }
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["Het"],stat="Het",pos.name="Pos")
  }
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["deltad"],stat="deltad",pos.name="Pos")
  }
  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  if(isTRUE(includeArrows)){
    upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["D"],stat="D",pos.name="Pos")
  }
  #shared Fst outliers
  points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
         rep(0.5,length(this.df$BP[this.df$BP %in% fw.sig.reg$BP])),
         pch="|",cex=2,col="orchid4",lwd=3)
  
  if(nrow(g)>0){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    text(x=txt.locs$starts,y=0.35,cex=2,labels=txt.locs$name,srt=90,xpd=T)
  }
  #axes etc
   axis(1,pos=-0.2,c(xmin,xmax),
       labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=2)
  axis(2,las=1,hadj=0.75,cex.axis=2,pos=xmin,at=c(-0.25,0,0.25,0.5))
  mtext(chroms2plot[i],1,cex=2*0.75,line=1)
}
#par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0),mar=c(0, 0, 0, 0), new=TRUE)
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c("Putative Gene",
                expression(Shared~italic(F)[ST]~Outlier),
                HandPiName),
       bty='n',pch=c(15,124,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c("indianred","orchid4",alpha("#543005",0.75)))
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c(expression(italic(H)),
                expression(pi),#expression(FW-SW~italic(F)[ST]),
                expression("Jost's"~italic(D)),
                expression(delta~-divergence)),
       bty='n',pch=c(15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       cex = 2,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c(comp.col[1:2],#alpha(col=comp.col["Fst"],0.25)
             comp.col[4:5]))
dev.off()
## png 
##   2
Low pi and low het

Low pi and low het

Test for selective sweeps

To test for selective sweeps, I’m using SweeD, which is a modified version of SweepFinder. Although it allows for multi-threading, I’ve found that I can highly parallelize the analysis by first calculating the genome-wide frequency spectrum from the whole vcf and then running each chromosome separately in the background on a supercomputer. So I need to make separate vcf files per chromosome, with a header line.

header<-"##fileformat=VCFv4.2"
for(i in 1:length(lgs)){
  write.table(header,paste("sweed/",lgs[i],".vcf",sep=""),quote=FALSE,
              col.names=FALSE,row.names=FALSE )
  suppressWarnings(write.table(vcf[vcf$`#CHROM`==lgs[i],-708],paste("sweed/",lgs[i],".vcf",sep=""),
              col.names=TRUE,row.names=FALSE,quote=FALSE,sep='\t',append = TRUE))
}

Then I can run SweeD on each file, specifying the genoem-wide frequency spectrum file and a grid size of 50 - which I’ve done. Now I can start to make sense of the results. SweeD produces output that shows the position, likelihood, and alpha value. The likelihood value is the one we’re primarily interested in, so I’ll plot those genome-wide.

Plot SweeD results

sweed.files<-list.files(path="sweed",pattern="SweeD_Report",full.names=TRUE)
sweed<-do.call(rbind,lapply(sweed.files,function(file){
  swlg<-read.delim(file,skip=2,header=TRUE)
  if(length(grep("lg",file)==TRUE)>0){ #correct lowercase chrom names
    swlg$Chrom<-paste("LG",gsub(".*lg(\\d+)$","\\1",file),sep="")
  }else if(length(grep("LG",file)==TRUE)>0){
    swlg$Chrom<-gsub(".*(LG\\d+)$","\\1",file)
  }else{
    swlg$Chrom<-file
    print("Warning: error parsing chromosome name from filename")
  }
  return(swlg)
}))

#try this
sweed.plot<-fst.plot(sweed,fst.name = "Likelihood", bp.name = "Position",chrom.name = "Chrom", 
                    scaffs.to.plot=plot.scaffs,scaffold.widths = bounds,pch=19,
                    pt.cex=1,axis.size=0)
axis(2,pos=-5000000,las=1)
labs<-tapply(sweed.plot$plot.pos,sweed.plot$Chrom,median)
text(x=labs[lgs],y=-.5,labels=lgn,xpd=TRUE)
abline(h=quantile(sweed.plot$Likelihood,0.99),lty=2,col="cornflowerblue",lwd=2)

There are 11 of 1100 estimated likelihoods by SweeD that exceed the upper 1% quantile. These are distributed on 5 chromosomes: 1, 3, 5, 1, 1

Investigate nearby genes

sweed.out<-sweed[sweed$Likelihood >= quantile(sweed$Likelihood,0.99),]
#need to add a snp for these to work
sweed.out$SNP<-paste(sweed.out$Chrom,sweed.out$Position,sep=".")
sweed.in<-outlier.in.region(sweed.out,put.reg,oBP = "Position",oChrom = "Chrom")
sweed.nb<-outlier.nearby(sweed.out,put.reg,chrom.ld,oBP = "Position",oChrom="Chrom")

0 genes contain SweeD outliers and 49 genes are within the LD distance of the SweeD estimators. However, this is likely because the SweeD likelihood estimates are fairly spread out. So let’s plot those five chromosomes.

chroms2plot<-unique(sweed.out$Chrom)
h.pi.name<-"sweed_perchrom.png"
row.settings<-c(3,2)
shared.upp<-sweed.out
fst.points<-FALSE
xlims<-lapply(chroms2plot,function(lg,genes,vcf){
  xs<-c(min(vcf$POS[vcf$`#CHROM`==lg]),
        max(vcf$POS[vcf$`#CHROM`==lg]))
  return(xs)
},genes=shared.upp,vcf=vcf)
names(xlims)<-chroms2plot
#these are the genes nearby
fav.genes<-c("LEPR","EFNB1","TRIM14","ATP6V0A1","WNT11","angiotensin II receptor",
             "EYA","CA","SGK3","FLT4","MSX2")
genes2plot<-put.reg[put.reg$Gene %in% fav.genes,]
genes2plot$Gene<-as.character(genes2plot$Gene)
genes2plot$Gene[genes2plot$Gene == "angiotensin II receptor"]<-"AGTR1"
## fix the locations of a couple for readability
#trim14
trim14.min<-min(put.reg$StartBP[put.reg$Gene=="TRIM14" & put.reg$Chrom=="LG10"])
trim14.max<-max(put.reg$StopBP[put.reg$Gene=="TRIM14" & put.reg$Chrom=="LG10"])
genes2plot$StartBP[put.reg$Chrom=="LG10"&put.reg$Gene=="TRIM14"]<-trim14.min
genes2plot$StopBP[put.reg$Chrom=="LG10"&put.reg$Gene=="TRIM14"]<-trim14.max



comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
HandPiName<-"SweeD outliers"
png("Sweed.png",height=6,width=10,res=300,units = "in")
nf <- layout(matrix(c(1,1,1,
                      2,3,4,
                      5,6,7), nrow=3, byrow=TRUE),widths=c(1,1,1),heights=c(0.75,1.25,1.25))
# genome-wide
par(oma=c(1,1,1,1),mar=c(1,2,0.5,1))
sweed.plot<-fst.plot(sweed,fst.name = "Likelihood", bp.name = "Position",chrom.name = "Chrom", 
                    scaffs.to.plot=plot.scaffs,scaffold.widths = bounds,pch=19,
                    pt.cex=1,axis.size=0,y.lim=c(0,15))
axis(2,pos=-5000000,las=1,at=c(0,15),ylim=c(0,15),cex.axis=1.5)
mtext("CLR",2,line=1.5)
labs<-tapply(sweed$plot.pos,sweed$Chrom,median)
text(x=labs[lgs],y=-1.75,labels=lgn,xpd=TRUE,cex=1.5)
abline(h=quantile(sweed.plot$Likelihood,0.99),lty=2,col="cornflowerblue",lwd=2)

# add each chrom with outliers
for(i in 1:length(chroms2plot)){
  this.df<-fwsw[fwsw$Chr %in% chroms2plot[i],]
  this.xlim<-xlims[[as.character(chroms2plot[i])]]
  plot(this.df$BP,this.df$Corrected.AMOVA.Fst, xlim=this.xlim,ylim=c(-0.2,0.5),axes=F,ylab="",xlab="",type='n')
  xmin<-this.xlim[1]#min(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  xmax<-this.xlim[2]#max(pi.plot$Pos[pi.plot$Chrom%in%chroms2plot[i]])
  
  #the shared peaks
  p<-lapply(shared.upp$Pos[shared.upp$Chrom %in% chroms2plot[i]],
         function(pos){
           points(y=c(-0.2,0.5),x=c(pos,pos),
                  type="l",col=alpha("#f1b6da",0.75),cex=2,lwd=4)
         })
  
  points(y=rep(c(-0.2,0.5),length(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]])),
         x=c(shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]],
             shared.upp$plot.pos[shared.upp$Chrom %in% chroms2plot[i]]),
         type="l",col=alpha("#f1b6da",0.75),cex=2,lwd=4)
  #putative gene regions
  g<-genes2plot[genes2plot$Chrom %in% chroms2plot[i] & 
                  genes2plot$StartBP >= xmin & genes2plot$StartBP <= xmax,]
  a<-put.reg[put.reg$Chrom %in% chroms2plot[i] & !(put.reg$Gene %in% fav.genes),]
  
  if(nrow(g) > 0){
    rect(xleft=as.numeric(g$StartBP),xright=as.numeric(g$StopBP),
       ybottom=-0.2,ytop=0.44,col="indianred",border="indianred")
  }
  
  #Fst
  if(fst.points==TRUE){
    points(this.df$BP,this.df$Corrected.AMOVA.Fst,pch=19,cex=1.5,
           col=alpha(col=comp.col["Fst"],0.25),bg=alpha(col=comp.col["Fst"],0.25))
  }
  #Pi
  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
       type="l",lwd=2,col=comp.col["pi"])
  upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["pi"],stat="Pi",pos.name="Pos")
  #Het
  points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["Het"],lwd=2)
  upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["Het"],stat="Het",pos.name="Pos")
  
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  upp.low.pts(smooth=dd.bp.smooth[[1]],dd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["deltad"],stat="deltad",pos.name="Pos")

  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chroms2plot[i],c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  upp.low.pts(smooth=jd.bp.smooth[[1]],jd.bp.smooth[[2]],chrom=chroms2plot[i],
              color=comp.col["D"],stat="D",pos.name="Pos")
  
  #shared Fst outliers
  points(this.df$BP[this.df$BP %in% fw.sig.reg$BP],
         this.df$Corrected.AMOVA.Fst[this.df$BP %in% fw.sig.reg$BP],
         pch=8,cex=2,col="orchid4",lwd=3)
  #add text for putative genes
  if(i == 1){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs[1,"starts"]<-txt.locs[1,"starts"]-500000
    txt.locs[2,"starts"]<-txt.locs[2,"starts"]+500000
    text(x=txt.locs$starts,y=0.35,cex=1.5,labels=txt.locs$name,srt=90,xpd=T)  
  }else if(i==2){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs[3,"starts"]<-mean(txt.locs[3:5,"starts"])
    txt.locs<-txt.locs[1:3,]
    text(x=txt.locs$starts,y=0.35,cex=1.5,labels=txt.locs$name,srt=90,xpd=T) 
  }else if(i==4){
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    txt.locs[1,"starts"]<-txt.locs[1,"starts"]+75000
    text(x=txt.locs$starts[-4],y=0.35,cex=1.5,labels=txt.locs$name[-4],srt=90,xpd=T)  
    text(x=txt.locs$starts[4],y=0,cex=1.5,labels=txt.locs$name[4],srt=90,xpd=T)  
  }else{
    txt.locs<-data.frame(starts=unique(g$StartBP),name=g$Gene[!duplicated(g$StartBP)])
    txt.locs<-txt.locs[order(txt.locs$starts),]
    #txt.locs<-txt.locs[-4,]
    text(x=txt.locs$starts,y=0.35,cex=1.5,labels=txt.locs$name,srt=90,xpd=T) 
  }
  
  #axes etc
   axis(1,pos=-0.2,c(xmin,xmax),
       labels = c(round((xmin/1000000),2),round((xmax/1000000),2)),cex.axis=1.5)
  axis(2,las=1,hadj=0.75,cex.axis=1.5,pos=xmin,at=c(-0.25,0,0.25,0.5))
  mtext(chroms2plot[i],1,cex=1.5*0.75,line=1)
}

# legend
plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
legend("topleft",xpd=TRUE,
       legend=c("Putative Gene",
                expression(Shared~italic(F)[ST]~Outlier),
                HandPiName, expression(italic(H)),
                expression(pi),
                expression("Jost's"~italic(D)),
                expression(delta~-divergence)),
       bty='n',pch=c(15,8,15,15,15,15,15),#lwd=c(2,1,4,2,2,0,2,2),lty=c(1,0,1,1,1,0,1,1),
       pt.cex = 2,cex=1.5,x.intersp = 0.5,y.intersp = 0.75,text.width=0.45,
       col=c("indianred","orchid4",alpha("#f1b6da",0.75),comp.col[1:2],comp.col[4:5]))
dev.off()
SweeD Plot

SweeD Plot

Figure 7

I’m highlighting a few of the putative genes that have a bunch of outliers nearby or in them. First is TNS1, which matched three genome annotations but only had one region, on LG1, that contained shared outlier Fsts, delta divergence, Bayenv salinity, were near monophyletic neighbor joining trees.

fst.dfs<-list(fwsw.tx,fwsw.la,fwsw.al,fwsw.fl)
names(fst.dfs)<-c("TX FWSW","LA FWSW","AL FWSW","FL FWSW")
colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)

Plotting function

The plotting function gene.region.plot accpets a number of parameters.

gene.region.plot<-function(chrom,gene,put.reg,vcf,chrom.ld, fst.dfs,deltad=FALSE,D=FALSE,
                           colors="black",lwds=2,alphas=0.5,ltys=1,legend=TRUE,bases="bp", 
                           smooth=FALSE, smooth.loess=TRUE,fst.name="Corrected.AMOVA.FST",txt.cex=1,y.lim=c(0,1),...){
  pstart<-min(as.numeric(as.character(put.reg[put.reg$Gene ==gene & 
                                                     put.reg$Chrom==chrom,"StartBP"])))-(chrom.ld[chrom]/2)
  pstop<-max(as.numeric(as.character(put.reg[put.reg$Gene ==gene & 
                                                    put.reg$Chrom==chrom,"StartBP"])))+(chrom.ld[chrom]/2)
  if(pstart < 0){ pstart<- 0 }
  if(pstop > max(vcf[vcf$`#CHROM`==chrom,"POS"])){ pstop <- max(vcf[vcf$`#CHROM`==chrom,"POS"]) }
  starts<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StartBP"])
  stops<-as.numeric(put.reg[put.reg$Chrom %in% chrom & put.reg$Gene == gene,"StopBP"])
  
  if(smooth==TRUE){
    #generate the dataframes
    smooth.fsts<-lapply(fst.dfs,function(df){
      this.df<-df[df$Chr %in% chrom,]
      this.smooth<-do.call("rbind",lapply(seq(1,nrow(this.df),(nrow(this.df)*0.15)/5),sliding.avg,
                                        dat=data.frame(Pos=this.df$BP,Fst=this.df[,fst.name]),
                                        width=nrow(this.df)*0.15))
      return(this.smooth)
    })
  }  else{
    smooth.fsts<-lapply(fst.dfs,function(df){
      this.smooth<-df[df$Chr %in% chrom,c("BP",fst.name)]
    })
  }
  
  if(is.data.frame(deltad)){
    this.dd<-deltad[deltad$Chrom %in% chrom,]
    if(smooth.loess==TRUE){
      dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
      names(dd.smooth)<-c("pos","smoothed.stats")
    }else{
      dd.smooth<-this.dd
    }
  }else if(is.list(deltad)){
    dd.smooth<-deltad[[2]][deltad[[2]][,"chr"]%in%chrom,]
  }else{
    dd.smooth<-NULL
  }
  
  if(is.data.frame(D)){
    this.d<-D[D$Chr %in% chrom,]
    if(smooth.loess==TRUE) {
      dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2)
      names(dd.smooth)<-c("pos","smoothed.stats")
    }else{
      dsmooth<-this.d
    } 
  }else if(is.list(D)){
    dsmooth<-D[[2]][D[[2]][,"chr"]%in%chrom,]
  }else{
    D<-NULL
  }
  
  pr.gene<-put.reg[put.reg$Chrom==chrom & put.reg$Gene==gene,]
  fst.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$rad.loci[pr.gene$rad.loci != "NA"]),","))
  sal.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfs.in[pr.gene$bfs.in != "NA"]),","))
 # tmp.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bft.in[pr.gene$bft.in != "NA"]),","))
  #grs.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$bfg.in[pr.gene$bfg.in != "NA"]),","))
  #jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jostd.nb[jostd.nb != "NA"]),","))
  sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
 #nj.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nj.nb[pr.gene$nj.nb != "NA"]),",")))
  nj.gene<-NULL
  nb.gene<-as.numeric(unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$nearby.rad[pr.gene$nearby.rad != "NA"]),",")))
  gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
                       shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
  
  if(length(colors) != length(smooth.fsts)){
    colors<-rep(colors,length(smooth.fsts)/length(colors))
  }
  if(length(lwds) != length(smooth.fsts)){
    lwds<-rep(lwds,length(smooth.fsts)/length(lwds))
  }
  if(length(alphas) != length(smooth.fsts)){
    alphas<-rep(alphas,length(smooth.fsts)/length(alphas))
  }
  if(length(ltys) != length(smooth.fsts)){
    ltys<-rep(ltys,length(smooth.fsts)/length(ltys))
  }
  
  plot(smooth.fsts[[1]],type='n',ylim=c(y.lim[1],y.lim[2]+0.02),
       bty="L",xlab="",ylab="",xaxt='n',yaxt='n',xlim=c(pstart,pstop))
  axis(2,las=1,cex.axis=txt.cex)  
  xlabs<-c(pstart,pstop)
  if(bases %in% c("mb","MB","Mb")) xlabs<-c(round((pstart/1000000),2),round((pstop/1000000),2))
  if(bases %in% c("kb","KB","Kb")) xlabs<-c(round((pstart/1000),2),round((pstop/1000),2))
  axis(1,at=c(pstart,pstop),cex.axis=txt.cex,labels = xlabs)
  #add putative gene
  rect(xleft=as.numeric(starts),xright=as.numeric(stops),
         ybottom=y.lim[1]-0.04,ytop=y.lim[2],col="indianred",border="indianred")
  #add fsts
  mtext(chrom,1,cex=0.75*txt.cex,line = 1)
  for(i in 1:length(smooth.fsts)){
    points(smooth.fsts[[i]],col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
  }
  
  #add delta divergence
  if(is.data.frame(dd.smooth)){
    points(dd.smooth$pos,dd.smooth$smoothed.stats,col="#dfc27d",type="l",lwd=2)
  }
  if(is.list(deltad)){
    upp.low.pts(smooth=deltad[[1]],target=deltad[[2]],chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos")
  }
  
  #add Jost's D
  if(is.data.frame(dsmooth)){
    points(dsmooth$pos,dsmooth$smoothed.stats,type="l",col="#a6611a",lwd=2)
  }
  if(is.list(D)){
    upp.low.pts(smooth=D[[1]],target=D[[2]],chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos")
  }
  
  
  #are nj trees shown in scope of fig?
  njs.eval<-lapply(nj.gene,function(x){
    if(x >= pstart & x <= pstop) { eval = TRUE }
    else { eval = FALSE }
    return(eval)
  })
  
  #add delta d
  if(is.data.frame(deltad)){
    lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
    lgnd<-c(lgnd,expression(delta~-divergence))
  }else{
    lgnd<-unlist(lapply(names(fst.dfs),function(n) { substitute(paste(name,italic(F[ST]),sep=""),list(name=n)) }))

  }
  
  cols<-NULL
  pchs<-rep(32,length(lwds))
  for(i in 1:length(colors)){
    cols<-c(cols,alpha(colors[i],alphas[i]))
  }
  if(!is.null(deltad)){
    cols<-c(cols,"#dfc27d")
    pchs<-c(pchs,32)
    ltys<-c(ltys,1)
    lwds<-c(lwds,2)
  }
  if(!is.null(D)){
    cols<-c(cols,"#a6611a")
    pchs<-c(pchs,32)
    lgnd<-c(lgnd,"Jost's D")
    ltys<-c(ltys,1)
    lwds<-c(lwds,2)
  }
  
  lgnd<-c(lgnd,substitute(italic(g),list(g=gene)),"Outlier RAD loci")
  
  cols<-c(cols,"indianred","black")
  pchs<-c(pchs,15,124)
  if(length(grep(TRUE,njs.eval))>0){
    lgnd<-c(lgnd,"Monophyletic Gene Tree")
    cols<-c(cols,"#08519c")
    pchs<-c(pchs,124)
  }
  
  if(length(sal.gene)>0){
    lgnd<-c(lgnd,"Salinity-associated")
    cols<-c(cols,"black")
    pchs<-c(pchs,25)
  }
  if(legend==TRUE){
  legend("topleft",
         legend=lgnd,pch=pchs,
         bty='n',lwd=c(lwds,0,0,0,0),lty=c(ltys,0,0,0,0),
         col=cols,cex = txt.cex)
  }
  #add outlier loci
  clip(x1=min(as.numeric(pstart)),x2=max(as.numeric(pstop)),y1=y.lim[2]+.01,y2=y.lim[2]+0.2)
  abline(v=fst.gene,col="black")
  points(x=sal.gene,y=rep(y.lim[2]+0.05,length(sal.gene)),col="black",pch=25,cex=0.75*txt.cex,lwd=2)
  abline(v=nb.gene,col=alpha("black",0.5),cex=2)
  abline(v=nj.gene,col="#08519c",cex=2)
}

outside.legend<-function(...){
  opar <- par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), 
    mar=c(0, 0, 0, 0), new=TRUE)
  on.exit(par(opar))
  plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n')
  legend(...)
}

Plot all genes - for supplement

#set up legend parameters
leg.text<-c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
            expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
            expression(delta~-divergence),expression("Jost's"~italic(D)),
            expression(Shared~italic(F)[ST]~Outliers),
            "Salinity-associated SNPs")
leg.pchs<-c(rep(32,6),124,25,124)
leg.ltys<-c(1,1,2,1,1,1,0,0,0)
leg.cols<-c(colors,comp.col["deltad"],comp.col["D"],"black","black","indianred")


png("ARGHEF3.png",height=8,width=10,units="in",res=300)       
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
arhgef3<-lapply(unique(put.reg[put.reg$Gene=="ARHGEF3" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="ARHGEF3",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(ARHGEF3))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()

png("mucin.png",height=8,width=10,units="in",res=300)       
par(mfrow=c(5,3),oma=c(2,2,3,2),mar=c(2,2,1,1))
mucin<-lapply(unique(put.reg[put.reg$Gene=="mucin"& put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="mucin",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(mucin))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()

png("NHE.png",height=4,width=6,units="in",res=300)       
par(mfrow=c(2,4),oma=c(2,2,3,2),mar=c(2,2,2,2))
nhe<-lapply(unique(put.reg[put.reg$Gene=="NHE" & put.reg$Chrom%in%lgs,"Chrom"]),# & !is.na(put.reg$rad.loci)
       gene.region.plot,gene="NHE",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(NHE))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=1.25,x.intersp=0,col=leg.cols)
dev.off()

png("TAAR.png",height=4,width=6,units="in",res=300)       
par(mfrow=c(1,2),oma=c(2,2,3,2),mar=c(2,2,2,2))
taar<-lapply(unique(put.reg[put.reg$Gene=="TAAR" & put.reg$Chrom%in%lgs,"Chrom"]), #& !is.na(put.reg$rad.loci)
       gene.region.plot,gene="TAAR",put.reg=put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
outside.legend("top",legend=c(leg.text,expression(italic(TAAR))),pch=leg.pchs,lty=leg.ltys,lwd=2,
               bty='n',ncol=3,cex=.75,x.intersp=0,col=leg.cols)
dev.off()
Supplmental Fig. 5

Supplmental Fig. 5

Supplemental Fig. 6

Supplemental Fig. 6

Supplemental Fig. 7

Supplemental Fig. 7

Supplemental Fig. 8

Supplemental Fig. 8

Figure 7

colors=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6])
alphas=c(0.5,0.5,1,0.5)
ltys=c(1,1,2,1)

png("Fig7candidateGenes.png",height=10,width=10,units="in",res=300)

par(mfrow=c(4,3),oma=c(2,2,4.5,2),mar=c(1,3,2,1))
#row1
gene.region.plot("LG1","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

gene.region.plot("LG5","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

gene.region.plot("LG11","NHE",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(NHE)),xpd=T,cex=2)

#row2
gene.region.plot("LG2","mucin",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4500000,y=0.5,expression(italic(mucin)),xpd=T,cex=2)

gene.region.plot("LG4","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=15000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)

gene.region.plot("LG7","TAAR",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=3000000,y=0.5,expression(italic(TAAR)),xpd=T,cex=2)

#row3
gene.region.plot("LG7","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=8500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG13","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=2200000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG14","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=6500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

#row4
gene.region.plot("LG18","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=5500000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG19","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)

gene.region.plot("LG20","ARHGEF3",put.reg,vcf=vcf,chrom.ld=chrom.ld,
       fst.dfs=fst.dfs,deltad=dd.bp.smooth,D=jd.bp.smooth,colors=colors,alphas=alphas,ltys=ltys,
       smooth=FALSE,smooth.loess=FALSE,fst.name="Smoothed.AMOVA.Fst",bases="MB",legend=FALSE)
text(x=4000000,y=0.5,expression(italic(ARHGEF3)),xpd=T,cex=2)


outside.legend("top",legend=c(expression(TX~FWSW~italic(F)[ST]),expression(LA~FWSW~italic(F)[ST]),
            expression(AL~FWSW~italic(F)[ST]),expression(FL~FWSW~italic(F)[ST]),
            expression(delta~-divergence),expression("Jost's"~italic(D)),
            expression(Shared~italic(F)[ST]~Outliers),
            "Salinity-associated SNPs","FW gene"),
            pch=c(rep(32,6),124,25,124),lty=c(1,1,2,1,1,1,0,0,0),lwd=2,
               bty='n',ncol=5,cex=1.25,x.intersp=0,
            col=c(colors,comp.col["deltad"],comp.col["D"],"black","black","indianred"))
# 
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1.5, 0:1.5, type="n", xlab="", ylab="", axes=FALSE)
# legend(x=0.5,y=0.22,c(expression(Shared~italic(F)[ST]~Outliers),
#                 "Salinity-associated SNPs",
#                 "FW Candidate Gene"),
#        pch=c(124,25,124),lty=c(0,0,0),lwd=2,
#        col=c("black","black","indianred"),
#        bty='n',ncol=1,cex=2,x.intersp=-0.5,xpd=T)
# 
# 
# #add lines to the top
# par(fig = c(0, 1, 0, 1),oma=rep(0, 4), mar=rep(0, 4), new=TRUE)
# plot(0:1, 0:1, type="n", xlab="", ylab="", axes=FALSE)
# legend("top",c(expression(TX~FWSW~italic(F)[ST]),
#                 expression(LA~FWSW~italic(F)[ST]),
#                 expression(AL~FWSW~italic(F)[ST]),
#                 expression(FL~FWSW~italic(F)[ST]),
#                 expression(delta~-divergence)),
#        pch=rep(32,5),lty=c(1,1,1,1,1),lwd=2,text.width=0.16,
#        col=c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
#           "#dfc27d"),
#        bty='n',ncol=3,cex=2,x.intersp=0.5,y.intersp=0.75,xpd=T)
dev.off()
Fig. 7

Fig. 7

Figure 8: LG4

LG4 clearly is enriched for outliers, let’s look at it more closely, including putative FW genes.

#define a few things
chrom<-"LG4"
pchs<-c(rep(32,6),15,124,124)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
        "#dfc27d","#a6611a","indianred","black","#08519c")
comp.col<-c(Het="#80cdc1",pi="#018571",Fst="black",D="#a6611a",deltad="#dfc27d")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,0)

#put together the data for LG4
#this.dd<-deltad[deltad$Chrom %in% chrom,]
#dd.smooth<-loess.smooth(this.dd$Pos,this.dd$deltad,span=0.1,degree=2)
#add Jost's D
#this.d<-jostd[jostd$Chr %in% chrom,]
#dsmooth<-loess.smooth(this.d$POS,this.d$D,span=0.1,degree=2) 
#get the outliers
pr.gene<-put.reg[put.reg$Chrom==chrom ,]
#rad loci 
fst.gene<- stacks.sig[stacks.sig$Chr==chrom,"BP"]
#none of the salinity-associated RAD loci are in or near genes on LG8, but we could plot them all anyway
sal.gene<-sal.bf.sig[sal.bf.sig$scaffold=="LG8","BP"]
#jost's d - none are in the putative genes
jd.gene<-unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$jost.in[jost.in != "NA"]),","))
#delta divergence outliers- none are in the putative genes
sdd.gene<- unlist(strsplit(gsub(paste(chrom,".",sep=""),"",pr.gene$sdd.in[pr.gene$sdd.in != "NA"]),","))
#put all of them together with their associated shapes
gene.out<-data.frame(BP=c(as.numeric(fst.gene),as.numeric(sal.gene),as.numeric(sdd.gene)),
                     shape=c(rep(8,length(fst.gene)),rep(1,length(sal.gene)),rep(4,length(sdd.gene))))
#add putative genes
g<-put.reg[put.reg$Chrom %in% chrom,]
  g$Gene<-as.character(g$Gene)
  g$Gene[grep("ATP6",g$Gene)]<-"ATP6V" #standardize the names
  g<-g[,c("Gene","StartBP","StopBP")]
  g<-g[!duplicated(g$StartBP),]
  g<-g[order(g$StartBP),]
## Model 1
source("../programs/dmc-master/getMCLE.R")
dmc.pattern<-"p4LG4_100000_2_9_forReal"
positions<-readRDS("dmc/selectedRegionPositions_p4LG4.RDS")
selSite = positions[seq(1, length(positions[positions<10000000]), length.out = 100)]
sels = c(1e-4, 1e-3, 0.01, seq(0.02, 0.14, by = 0.01), seq(0.15, 0.3, by = 0.05), 
             seq(0.4, 0.6, by = 0.1))
times = c(0, 5, 25, 50, 100, 500, 1000, 1e4, 1e6)
migs = c(10^-(seq(5, 1, by = -2)), 0.5, 1)
Ne<-100000
rec=2*10^-9
sources<-c(3,5,7,16)
gs<-c(1/(2*Ne), 10^-(4:1))
compLikelihood_ind<-readRDS(paste("dmc/compLikelihood_ind_",dmc.pattern,".RDS",sep=""))
mcle_ind = getMCLEind(compLikelihood_ind, selSite, sels)
mcle_ind_index<-which(mcle_ind[1]==selSite)
profileLike_sel_ind = sapply(1: length(sels), function(i) {
  max(unlist(compLikelihood_ind[[mcle_ind_index]][[i]]))
})

## Model 3
compLikelihood_sv<-readRDS(paste("dmc/compLikelihood_sv_",dmc.pattern,".RDS",sep=""))
mcle_sv<-getMCLEsv_source(compLikelihood_sv, selSite, sels, gs, times, sources)
mcle_sv_index<-which(mcle_sv[1]==selSite)
profileLike_sel_sv = sapply(1: length(sels), function(i) {
  max(unlist(compLikelihood_sv[[mcle_sv_index]][[i]]))
})

## Model 5
compLikelihood_mixed_svInd<-readRDS(paste("dmc/compLikelihood_mixed_svInd_",dmc.pattern,".RDS",sep=""))
mcle_mixed_svInd<-getMCLEmixed(compLikelihood_mixed_svInd, selSite, sels, gs, times, migs[1], sources)
mcle_mixed_svInd_index<-which(mcle_mixed_svInd[1]==selSite)
profileLike_sel_mixed_svInd = sapply(1: length(sels), function(i) {
  max(unlist(compLikelihood_mixed_svInd[[mcle_mixed_svInd_index]][[i]]))
})

#functions for plotting dmc output
calc.max.complike<-function(pattern,neutral_name=NA){
  #read in composite likelihood files and calculate max for all proposed selected sites
  if(is.na(neutral_name)){
    compLikelihood_neutral = readRDS(paste("dmc/compLikelihood_neutral_",dmc.pattern,".RDS",sep=""))
  }else{
    compLikelihood_neutral = readRDS(neutral_name)
  }
  
  compLikelihood_neutral_site = sapply(1 : length(compLikelihood_neutral), function(i) {
    max(unlist(compLikelihood_neutral[[i]]))
  })
  
  compLikelihood_ind = readRDS(paste("dmc/compLikelihood_ind_",pattern,".RDS",sep=""))
  compLikelihood_ind_site = sapply(1 : length(compLikelihood_ind), function(i) {
    max(unlist(compLikelihood_ind[[i]]))
  })
  
  compLikelihood_mig = readRDS(paste("dmc/compLikelihood_mig_",pattern,".RDS",sep=""))
  compLikelihood_mig_site = sapply(1 : length(compLikelihood_mig), function(i) {
    max(unlist(compLikelihood_mig[[i]]))
  })
  
  compLikelihood_sv = readRDS(paste("dmc/compLikelihood_sv_",pattern,".RDS",sep=""))
  compLikelihood_sv_site = sapply(1 : length( compLikelihood_sv), function(i) {  
    max(unlist(compLikelihood_sv[[i]]))
  })
  
  compLikelihood_mixed_migInd = readRDS(paste("dmc/compLikelihood_mixed_migInd_",pattern,".RDS",sep=""))
  compLikelihood_mixed_migInd_site = sapply(1 : length(compLikelihood_mixed_migInd), function(i) {
    max(unlist(compLikelihood_mixed_migInd[[i]]))
  })
  
  compLikelihood_mixed_svInd = readRDS(paste("dmc/compLikelihood_mixed_svInd_",pattern,".RDS",sep=""))
  compLikelihood_mixed_svInd_site = sapply(1 : length(compLikelihood_mixed_svInd), function(i) {
    max(unlist(compLikelihood_mixed_svInd[[i]]))
  })
  return(list(max.likes=c(ind=(compLikelihood_ind_site - compLikelihood_neutral_site),
           mig=(compLikelihood_mig_site - compLikelihood_neutral_site), 
           sv=(compLikelihood_sv_site - compLikelihood_neutral_site),
           migInd=(compLikelihood_mixed_migInd_site - compLikelihood_neutral_site),
           svInd=(compLikelihood_mixed_svInd_site - compLikelihood_neutral_site)),
         max.complikes=c(neutral=compLikelihood_neutral,ind=compLikelihood_ind,
           mig=compLikelihood_mig, 
           sv=compLikelihood_sv,
           migInd=compLikelihood_mixed_migInd,
           svInd=compLikelihood_mixed_svInd)))
}

plot.complike<-function(pattern,leg=TRUE,lab=TRUE,plot_range = NA,selSite){
  
}
png("LG4.png",width=10,height=7,units="in",res=300)

nf <- layout(matrix(c(1,1,1,1,
                      2,2,2,2,
                      3,3,3,3), nrow=3, byrow=TRUE))

### Plot all of LG4
par(oma=c(2.5,5,2,1),mar=c(1.5,1,2,1))
plot(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom],
     fst.dfs[[1]]$Smoothed.AMOVA.Fst[fst.dfs[[1]]$Chr %in% chrom],
     type='n',ylim=c(0,1),bty="L",xlab="",ylab="",xaxt='n',yaxt='n')
  axis(2,las=1,cex.axis=1.75)  
  axis(1,cex.axis=1.75,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3))
  #add fsts
  #mtext(chrom,1,cex=2*0.75,line=2.5)
  for(i in 1:length(fst.dfs)){
    points(fst.dfs[[i]]$BP[fst.dfs[[i]]$Chr %in% chrom],
           fst.dfs[[i]]$Smoothed.AMOVA.Fst[fst.dfs[[i]]$Chr %in% chrom],
           col=alpha(colors[i],alphas[i]),type="l",lwd=lwds[i],lty=ltys[i])
  }
  #add delta-d and Jost's D
  #Pi
#  points(pi.bp.smooth[[2]][pi.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
#       type="l",lwd=2,col=comp.col["pi"])
#  upp.low.pts(smooth=pi.bp.smooth[[1]],target=pi.bp.smooth[[2]],
#              chrom=chrom,color=comp.col["pi"],stat="Pi",pos.name="Pos",cex=1.75)
  #Het
 # points(ht.bp.smooth[[2]][ht.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
#         type="l",col=comp.col["Het"],lwd=2)
 # upp.low.pts(smooth=ht.bp.smooth[[1]],target=ht.bp.smooth[[2]],
#              chrom=chrom,color=comp.col["Het"],stat="Het",pos.name="Pos",cex=1.75)
  
  #deltad
  points(dd.bp.smooth[[2]][dd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
         type="l",col=comp.col["deltad"],lwd=2)
  upp.low.pts(smooth=dd.bp.smooth[[1]],target=dd.bp.smooth[[2]],
              chrom=chrom,color=comp.col["deltad"],stat="deltad",pos.name="Pos",cex=1.75)

  #Josts D
  points(jd.bp.smooth[[2]][jd.bp.smooth[[2]][,"chr"]%in% chrom,c("pos","smoothed.stats")],
         type="l",col=comp.col["D"],lwd=2)
  upp.low.pts(smooth=jd.bp.smooth[[1]],target=jd.bp.smooth[[2]],
              chrom=chrom,color=comp.col["D"],stat="D",pos.name="Pos",cex=1.75)
  
  
   #add the putative genes
  starts<-as.numeric(g[,"StartBP"])
  stops<-as.numeric(g[,"StopBP"])
  rect(xleft=as.numeric(starts),xright=as.numeric(stops),
         ybottom=-0.04,ytop=1,col="indianred",border="indianred")
  #add the ones that are spaced out
  text(x=g$StartBP[!g$Gene %in% c("SCUBE1","TRIM14")],y=0.5,
       labels=g$Gene[!g$Gene %in% c("SCUBE1","TRIM14")],font=2,srt=90,xpd=T,cex=1.75)
  text(x=g$StartBP[g$Gene == "SCUBE1"]-50000,y=0.8,
       labels=g$Gene[g$Gene =="SCUBE1"],font=2,srt=90,xpd=T,cex=1.75)
  text(x=g$StartBP[g$Gene == "TRIM14"]+50000,y=0.2,
       labels=g$Gene[g$Gene =="TRIM14"],font=2,srt=90,xpd=T,cex=1.75)
  #add outlier loci
  clip(x1=min(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),
       x2=max(as.numeric(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom])),y1=0.99,y2=1.06)
  abline(v=fst.gene,col="black",lwd=1.5)
  points(x=sal.gene,y=rep(1.05,length(sal.gene)),col="black",pch=25,cex=1)
  

### Plot the recombination rate
load("../../scovelli_genome/ssc_map.rda")
rec.cols<-c("#fdbb84","#fc8d59","#ef6548")
plot(set[["LG4"]]@interpolations$slidingwindow@physicalPositions,
     set[["LG4"]]@interpolations$slidingwindow@rates,type="l",bty="L",
     ylim=c(-5,25),xlab="",ylab="",xaxt='n',yaxt='n',lwd=2,col=rec.cols[1])
abline(h=0,lty=2)
axis(2,cex.axis=1.75,las=1)
axis(1,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3),cex.axis=1.75)
mtext("Recombination Rate\n(cM/Mb)",2,line=3,cex=1.5*0.75)
lines(set[["LG4"]]@interpolations$spline@physicalPositions,
      set[["LG4"]]@interpolations$spline@rates,type="l",col=rec.cols[2],lwd=2)
lines(set[["LG4"]]@interpolations$loess@physicalPositions,
      set[["LG4"]]@interpolations$loess@rates,type="l",lwd=2,col=rec.cols[3])
legend("topleft",bty='n',col=rec.cols,
       lwd=2,c("Sliding Window","Spline","Loess"),cex=1.5)

### Plot dmc results
max.complikes<-calc.max.complike(dmc.pattern)
max.likes<-max.complikes[[1]]
plot_range = range(max.likes)
plot(selSite, max.likes[grep("ind",names(max.likes))], type = "b",bty="l",
       ylim = c(plot_range[1] - 50, plot_range[2] + 50),
     xlim=range(fst.dfs[[1]]$BP[fst.dfs[[1]]$Chr %in% chrom]),
     xlab = "x.lab",ylab = "",cex=2,las=1,cex.axis=1.75,xaxt='n')
axis(1,at=seq(0,1.8*10^7,3*10^6),labels = seq(0,18,3),cex.axis=1.75)
lines(selSite, max.likes[grep("mig\\d+",names(max.likes))],cex=2, col = "#d7191c",#red
        type = "b")
lines(selSite, max.likes[grep("sv\\d+",names(max.likes))],cex=2, col = "#2b83ba",#blue
        type = "b")
lines(selSite, max.likes[grep("migInd",names(max.likes))],cex=2,#orange
        col = "#fdae61", lty = 2, type = "b")
lines(selSite, max.likes[grep("svInd",names(max.likes))],cex=2,#green
        col = "#91cf60", lty = 2, type = "b")
legend("topright", col = c("black", "#d7191c", "#2b83ba", "#fdae61", "#abdda4"),
           lty = c(rep(1, 3), rep(2, 2)), sapply(1 : 5, function(i) paste("Model", i)),
       bty='n',cex=1.5,lwd=2)
mtext("Composite Log Likelihood",2,line=3.75,cex=1.5*0.75)
mtext("Position on LG4 (Mb)",1,line=2.2,cex=1.5*0.75)
abline(v=mcle_ind[1],lty=2,lwd=2,col="black",xpd=FALSE)
abline(v=mcle_sv[1],lty=2,lwd=2,col="#2b83ba",xpd=FALSE)
abline(v=mcle_mixed_svInd[1],lty=2,lwd=2,col="#91cf60",xpd=FALSE)
  
### add legend
#create the legend text, and parameters
lgnd<-unlist(lapply(names(fst.dfs),function(n) { bquote(.(n)~italic(F[ST])) }))
lgnd<-c(lgnd,expression(delta~-divergence),"Jost's D","gene regions",
        expression("Shared"~italic(F[ST])~"outlier"),"Salinity-associated")
pchs<-c(rep(32,6),15,124,25)
cols<-c(grp.colors[1],grp.colors[2],grp.colors[3],grp.colors[6],
        "#dfc27d","#a6611a","indianred","black","black")
ltys=c(1,1,2,1,1,1,0,0,0)
lwds<-c(rep(2,6),0,0,1)
#plot the legend
par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), mar=c(0, 0, 0, 0), new=TRUE,xpd=TRUE)
plot(c(0,1),c(0,1),bty='n',type='n',xlab="",ylab="",xaxt='n',yaxt='n')
legend("top",legend=lgnd,pch=pchs,ncol=5,
       bty='n',lwd=lwds,lty=ltys,
       col=cols,xpd=TRUE,cex=1.5,
       x.intersp = 0.5,y.intersp = 0.75)
dev.off()
## png 
##   2
LG 4 Figure

LG 4 Figure

The loci that are the most likely targets of selection in models 3 and 5 are located at \(4.43784\times 10^{6}\) and \(4.645842\times 10^{6}\), respectively. Let’s use the gff file to annotate them.

#read in the gff if necessary
if(!("gff" %in% ls())){
  if(length(grep("gz",gff.name))>0){
    gff<-read.delim(gzfile(paste("../../scovelli_genome/",gff.name,sep="")),header=F)
  } else{
    gff<-read.delim(paste("../../scovelli_genome/",gff.name,sep=""),header=F)
  }
  colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
}
#get the blast matches
genome.blast<-read.csv("../../scovelli_genome/ssc_2016_12_20_cds_nr_blast_results.csv",skip=1)

#where are the loci?
gff[gff$seqname=="LG4" & gff$start <= mcle_sv[1] & gff$end >= mcle_sv[1],]
##        seqname source feature   start      end score strand frame
## 321376     LG4      .  contig 3641993 10355651     .      .     .
##                            attribute
## 321376 ID=scaffold_0;Name=scaffold_0
gff[gff$seqname=="LG4" & gff$start <= mcle_mixed_svInd[1] & gff$end >= mcle_mixed_svInd[1],]
##        seqname source feature   start      end score strand frame
## 321376     LG4      .  contig 3641993 10355651     .      .     .
##                            attribute
## 321376 ID=scaffold_0;Name=scaffold_0
dmc.genes<-data.frame(Locus.ID=c("Mod3","Mod5"),Chr=c("LG4","LG4"),BP=c(mcle_sv[1],mcle_mixed_svInd[1]),Column=c(1,1)) #format it appropriately
dmc.ann<-sig.region.ann(dmc.genes,gff,genome.blast)

kable(dmc.ann[,c("Locus","description")],row.names = FALSE)
Locus description
Mod3 NA
Mod5 NA

Finally, I will write the putative gene regions to a file to be supplemental table 2.

#Provide more informative names
colnames(put.reg)<-c("Gene","Function","Chromosome_CitedSpeices","CitedSpecies","Citation","Scovelli_geneID",
                     "My_Annotation_Notes","Scovelli_Chrom","Scovelli_StartBP","Scovelli_StopBP",
                     "Contains_RAD_locus","Near_RAD_locus","Contains_DeltaDivergence","Near_DeltaDivergence",
                     "Contains_BayenvSalinity","Contains_BayenvTemp","Contains_BayenvSeagrass",
                     "Near_JostD","Contains_JostD","Contains_Fst")
write.table(put.reg,"putative_genes_annotated.txt",sep='\t',col.names = TRUE,row.names = FALSE,quote=FALSE)